# Install required pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install numpy
!{sys.executable} -m pip install pandas
!{sys.executable} -m pip install matplotlib
!{sys.executable} -m pip install seaborn
Requirement already satisfied: numpy in c:\users\thari\anaconda3\lib\site-packages (1.20.1) Requirement already satisfied: pandas in c:\users\thari\anaconda3\lib\site-packages (1.2.4) Requirement already satisfied: pytz>=2017.3 in c:\users\thari\anaconda3\lib\site-packages (from pandas) (2021.1) Requirement already satisfied: numpy>=1.16.5 in c:\users\thari\anaconda3\lib\site-packages (from pandas) (1.20.1) Requirement already satisfied: python-dateutil>=2.7.3 in c:\users\thari\anaconda3\lib\site-packages (from pandas) (2.8.1) Requirement already satisfied: six>=1.5 in c:\users\thari\anaconda3\lib\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0) Requirement already satisfied: matplotlib in c:\users\thari\anaconda3\lib\site-packages (3.5.1) Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\thari\anaconda3\lib\site-packages (from matplotlib) (1.3.1) Requirement already satisfied: pillow>=6.2.0 in c:\users\thari\anaconda3\lib\site-packages (from matplotlib) (8.2.0) Requirement already satisfied: packaging>=20.0 in c:\users\thari\anaconda3\lib\site-packages (from matplotlib) (20.9) Requirement already satisfied: cycler>=0.10 in c:\users\thari\anaconda3\lib\site-packages (from matplotlib) (0.10.0) Requirement already satisfied: fonttools>=4.22.0 in c:\users\thari\anaconda3\lib\site-packages (from matplotlib) (4.30.0) Requirement already satisfied: python-dateutil>=2.7 in c:\users\thari\anaconda3\lib\site-packages (from matplotlib) (2.8.1) Requirement already satisfied: numpy>=1.17 in c:\users\thari\anaconda3\lib\site-packages (from matplotlib) (1.20.1) Requirement already satisfied: pyparsing>=2.2.1 in c:\users\thari\anaconda3\lib\site-packages (from matplotlib) (2.4.7) Requirement already satisfied: six in c:\users\thari\anaconda3\lib\site-packages (from cycler>=0.10->matplotlib) (1.15.0) Requirement already satisfied: seaborn in c:\users\thari\anaconda3\lib\site-packages (0.11.1) Requirement already satisfied: matplotlib>=2.2 in c:\users\thari\anaconda3\lib\site-packages (from seaborn) (3.5.1) Requirement already satisfied: numpy>=1.15 in c:\users\thari\anaconda3\lib\site-packages (from seaborn) (1.20.1) Requirement already satisfied: scipy>=1.0 in c:\users\thari\anaconda3\lib\site-packages (from seaborn) (1.6.2) Requirement already satisfied: pandas>=0.23 in c:\users\thari\anaconda3\lib\site-packages (from seaborn) (1.2.4) Requirement already satisfied: python-dateutil>=2.7 in c:\users\thari\anaconda3\lib\site-packages (from matplotlib>=2.2->seaborn) (2.8.1) Requirement already satisfied: fonttools>=4.22.0 in c:\users\thari\anaconda3\lib\site-packages (from matplotlib>=2.2->seaborn) (4.30.0) Requirement already satisfied: packaging>=20.0 in c:\users\thari\anaconda3\lib\site-packages (from matplotlib>=2.2->seaborn) (20.9) Requirement already satisfied: pyparsing>=2.2.1 in c:\users\thari\anaconda3\lib\site-packages (from matplotlib>=2.2->seaborn) (2.4.7) Requirement already satisfied: pillow>=6.2.0 in c:\users\thari\anaconda3\lib\site-packages (from matplotlib>=2.2->seaborn) (8.2.0) Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\thari\anaconda3\lib\site-packages (from matplotlib>=2.2->seaborn) (1.3.1) Requirement already satisfied: cycler>=0.10 in c:\users\thari\anaconda3\lib\site-packages (from matplotlib>=2.2->seaborn) (0.10.0) Requirement already satisfied: six in c:\users\thari\anaconda3\lib\site-packages (from cycler>=0.10->matplotlib>=2.2->seaborn) (1.15.0) Requirement already satisfied: pytz>=2017.3 in c:\users\thari\anaconda3\lib\site-packages (from pandas>=0.23->seaborn) (2021.1)
The following packages would need to be imported for use:
Numpy - For mathematical processing and array handling
Pandas - For handling the datasets as dataframes and carrying out related operations
Matplotlib and Seaborn - For visualation and plotting of data
#import required packages
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
The target and input datasets are loaded in as Pandas datasets. The datasets paths need to be included as raw Strings with the addition of an "r" before the text in order to load properly.
#import input features dataset
inputDataset = pd.read_csv(r'input_features.csv')
inputDataset.rename(columns=lambda x: x.lower(), inplace=True)
#import target features dataset
targetDataset = pd.read_csv(r'target_values.csv')
targetDataset.rename(columns=lambda x: x.lower(), inplace=True)
#display first few rows of the input dataset dataframe
inputDataset.head(8)
| building_id | geo_level_1_id | geo_level_2_id | geo_level_3_id | count_floors_pre_eq | age | area_percentage | height_percentage | land_surface_condition | foundation_type | ... | has_secondary_use_agriculture | has_secondary_use_hotel | has_secondary_use_rental | has_secondary_use_institution | has_secondary_use_school | has_secondary_use_industry | has_secondary_use_health_post | has_secondary_use_gov_office | has_secondary_use_use_police | has_secondary_use_other | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 802906 | 6 | 487 | 12198 | 2 | 30 | 6 | 5 | t | r | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 28830 | 8 | 900 | 2812 | 2 | 10 | 8 | 7 | o | r | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 94947 | 21 | 363 | 8973 | 2 | 10 | 5 | 5 | t | r | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 590882 | 22 | 418 | 10694 | 2 | 10 | 6 | 5 | t | r | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 201944 | 11 | 131 | 1488 | 3 | 30 | 8 | 9 | t | r | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 5 | 333020 | 8 | 558 | 6089 | 2 | 10 | 9 | 5 | t | r | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 6 | 728451 | 9 | 475 | 12066 | 2 | 25 | 3 | 4 | n | r | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 7 | 475515 | 20 | 323 | 12236 | 2 | 0 | 8 | 6 | t | w | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
8 rows × 39 columns
#display last few rows of the input dataset dataframe
inputDataset.tail(8)
| building_id | geo_level_1_id | geo_level_2_id | geo_level_3_id | count_floors_pre_eq | age | area_percentage | height_percentage | land_surface_condition | foundation_type | ... | has_secondary_use_agriculture | has_secondary_use_hotel | has_secondary_use_rental | has_secondary_use_institution | has_secondary_use_school | has_secondary_use_industry | has_secondary_use_health_post | has_secondary_use_gov_office | has_secondary_use_use_police | has_secondary_use_other | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 260593 | 226421 | 8 | 767 | 8613 | 2 | 5 | 13 | 5 | t | r | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 260594 | 159555 | 27 | 181 | 1537 | 6 | 0 | 13 | 12 | t | r | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 260595 | 827012 | 8 | 268 | 4718 | 2 | 20 | 8 | 5 | t | r | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 260596 | 688636 | 25 | 1335 | 1621 | 1 | 55 | 6 | 3 | n | r | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 260597 | 669485 | 17 | 715 | 2060 | 2 | 0 | 6 | 5 | t | r | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 260598 | 602512 | 17 | 51 | 8163 | 3 | 55 | 6 | 7 | t | r | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 260599 | 151409 | 26 | 39 | 1851 | 2 | 10 | 14 | 6 | t | r | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 260600 | 747594 | 21 | 9 | 9101 | 3 | 10 | 7 | 6 | n | r | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
8 rows × 39 columns
#display first few rows of the target dataset dataframe
targetDataset.head(8)
| building_id | damage_grade | |
|---|---|---|
| 0 | 802906 | 3 |
| 1 | 28830 | 2 |
| 2 | 94947 | 3 |
| 3 | 590882 | 2 |
| 4 | 201944 | 3 |
| 5 | 333020 | 2 |
| 6 | 728451 | 3 |
| 7 | 475515 | 1 |
#display last few rows of the target dataset dataframe
targetDataset.tail(8)
| building_id | damage_grade | |
|---|---|---|
| 260593 | 226421 | 2 |
| 260594 | 159555 | 2 |
| 260595 | 827012 | 3 |
| 260596 | 688636 | 2 |
| 260597 | 669485 | 3 |
| 260598 | 602512 | 3 |
| 260599 | 151409 | 2 |
| 260600 | 747594 | 3 |
The descriptive statistical characteristics of the input and target datasets are explored. The argumend "include=all" is used for the input dataset in order to include data from the categorical columns and not just the numerical ones
#descriptive characteristics of the variables of the input dataset
inputDataset.describe(include="all")
| building_id | geo_level_1_id | geo_level_2_id | geo_level_3_id | count_floors_pre_eq | age | area_percentage | height_percentage | land_surface_condition | foundation_type | ... | has_secondary_use_agriculture | has_secondary_use_hotel | has_secondary_use_rental | has_secondary_use_institution | has_secondary_use_school | has_secondary_use_industry | has_secondary_use_health_post | has_secondary_use_gov_office | has_secondary_use_use_police | has_secondary_use_other | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 2.606010e+05 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601 | 260601 | ... | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 |
| unique | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 3 | 5 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| top | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | t | r | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| freq | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 216757 | 219196 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| mean | 5.256755e+05 | 13.900353 | 701.074685 | 6257.876148 | 2.129723 | 26.535029 | 8.018051 | 5.434365 | NaN | NaN | ... | 0.064378 | 0.033626 | 0.008101 | 0.000940 | 0.000361 | 0.001071 | 0.000188 | 0.000146 | 0.000088 | 0.005119 |
| std | 3.045450e+05 | 8.033617 | 412.710734 | 3646.369645 | 0.727665 | 73.565937 | 4.392231 | 1.918418 | NaN | NaN | ... | 0.245426 | 0.180265 | 0.089638 | 0.030647 | 0.018989 | 0.032703 | 0.013711 | 0.012075 | 0.009394 | 0.071364 |
| min | 4.000000e+00 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 1.000000 | 2.000000 | NaN | NaN | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 2.611900e+05 | 7.000000 | 350.000000 | 3073.000000 | 2.000000 | 10.000000 | 5.000000 | 4.000000 | NaN | NaN | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 50% | 5.257570e+05 | 12.000000 | 702.000000 | 6270.000000 | 2.000000 | 15.000000 | 7.000000 | 5.000000 | NaN | NaN | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 75% | 7.897620e+05 | 21.000000 | 1050.000000 | 9412.000000 | 2.000000 | 30.000000 | 9.000000 | 6.000000 | NaN | NaN | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| max | 1.052934e+06 | 30.000000 | 1427.000000 | 12567.000000 | 9.000000 | 995.000000 | 100.000000 | 32.000000 | NaN | NaN | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
11 rows × 39 columns
#descriptive characteristics of the numerical categories of the target dataset
targetDataset.describe()
| building_id | damage_grade | |
|---|---|---|
| count | 2.606010e+05 | 260601.000000 |
| mean | 5.256755e+05 | 2.238272 |
| std | 3.045450e+05 | 0.611814 |
| min | 4.000000e+00 | 1.000000 |
| 25% | 2.611900e+05 | 2.000000 |
| 50% | 5.257570e+05 | 2.000000 |
| 75% | 7.897620e+05 | 3.000000 |
| max | 1.052934e+06 | 3.000000 |
From the tables above it can be seen that all columns contain the same number (260601) of elements of data.
Initial observations 1) The geo_level_3_id seems to have a very large standard deviation compared to the other columns, but this is due to the fact that the column contains larger values as a whole.
2) The average age of the buildings in the dataset is approximate 26 years old and contain an average of 2 floors. However, the age of the building contains a large standard deviation of 73 years.
3) The max value in the age column is 995, which may indicate an extremely old building or an error/outlier
The input and datasets need to be merged together for ease of processing and this can be acheived using the common primary key "building_id". Categorical variables are then encoded into sets of binary categories for each value. The presence of null, duplicate and other aberrant data are also explored
#ensuring both datasets of are the same size before mergins
(inputDataset.shape[0], targetDataset.shape[0])
(260601, 260601)
#merge input and target dataset on with the key "building_id"
mergedDataset = pd.merge(inputDataset, targetDataset, left_on = ['building_id'],
right_on = ['building_id'], how = 'outer')
mergedDataset.head()
| building_id | geo_level_1_id | geo_level_2_id | geo_level_3_id | count_floors_pre_eq | age | area_percentage | height_percentage | land_surface_condition | foundation_type | ... | has_secondary_use_hotel | has_secondary_use_rental | has_secondary_use_institution | has_secondary_use_school | has_secondary_use_industry | has_secondary_use_health_post | has_secondary_use_gov_office | has_secondary_use_use_police | has_secondary_use_other | damage_grade | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 802906 | 6 | 487 | 12198 | 2 | 30 | 6 | 5 | t | r | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 |
| 1 | 28830 | 8 | 900 | 2812 | 2 | 10 | 8 | 7 | o | r | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 |
| 2 | 94947 | 21 | 363 | 8973 | 2 | 10 | 5 | 5 | t | r | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 |
| 3 | 590882 | 22 | 418 | 10694 | 2 | 10 | 6 | 5 | t | r | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 |
| 4 | 201944 | 11 | 131 | 1488 | 3 | 30 | 8 | 9 | t | r | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 |
5 rows × 40 columns
#Check data types
mergedDataset.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 260601 entries, 0 to 260600 Data columns (total 40 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 building_id 260601 non-null int64 1 geo_level_1_id 260601 non-null int64 2 geo_level_2_id 260601 non-null int64 3 geo_level_3_id 260601 non-null int64 4 count_floors_pre_eq 260601 non-null int64 5 age 260601 non-null int64 6 area_percentage 260601 non-null int64 7 height_percentage 260601 non-null int64 8 land_surface_condition 260601 non-null object 9 foundation_type 260601 non-null object 10 roof_type 260601 non-null object 11 ground_floor_type 260601 non-null object 12 other_floor_type 260601 non-null object 13 position 260601 non-null object 14 plan_configuration 260601 non-null object 15 has_superstructure_adobe_mud 260601 non-null int64 16 has_superstructure_mud_mortar_stone 260601 non-null int64 17 has_superstructure_stone_flag 260601 non-null int64 18 has_superstructure_cement_mortar_stone 260601 non-null int64 19 has_superstructure_mud_mortar_brick 260601 non-null int64 20 has_superstructure_cement_mortar_brick 260601 non-null int64 21 has_superstructure_timber 260601 non-null int64 22 has_superstructure_bamboo 260601 non-null int64 23 has_superstructure_rc_non_engineered 260601 non-null int64 24 has_superstructure_rc_engineered 260601 non-null int64 25 has_superstructure_other 260601 non-null int64 26 legal_ownership_status 260601 non-null object 27 count_families 260601 non-null int64 28 has_secondary_use 260601 non-null int64 29 has_secondary_use_agriculture 260601 non-null int64 30 has_secondary_use_hotel 260601 non-null int64 31 has_secondary_use_rental 260601 non-null int64 32 has_secondary_use_institution 260601 non-null int64 33 has_secondary_use_school 260601 non-null int64 34 has_secondary_use_industry 260601 non-null int64 35 has_secondary_use_health_post 260601 non-null int64 36 has_secondary_use_gov_office 260601 non-null int64 37 has_secondary_use_use_police 260601 non-null int64 38 has_secondary_use_other 260601 non-null int64 39 damage_grade 260601 non-null int64 dtypes: int64(32), object(8) memory usage: 81.5+ MB
#get categorical variables
categorical_values = mergedDataset.select_dtypes(include=['object'])
print("Categorical Variables")
print("-------------------------------------------------")
categorical_values.columns
Categorical Variables -------------------------------------------------
Index(['land_surface_condition', 'foundation_type', 'roof_type',
'ground_floor_type', 'other_floor_type', 'position',
'plan_configuration', 'legal_ownership_status'],
dtype='object')
#get numerical variables
numeric_values = mergedDataset.select_dtypes(include=['int64'])
print("Numeric Variables")
print("-------------------------------------------------")
numeric_values.columns
Numeric Variables -------------------------------------------------
Index(['building_id', 'geo_level_1_id', 'geo_level_2_id', 'geo_level_3_id',
'count_floors_pre_eq', 'age', 'area_percentage', 'height_percentage',
'has_superstructure_adobe_mud', 'has_superstructure_mud_mortar_stone',
'has_superstructure_stone_flag',
'has_superstructure_cement_mortar_stone',
'has_superstructure_mud_mortar_brick',
'has_superstructure_cement_mortar_brick', 'has_superstructure_timber',
'has_superstructure_bamboo', 'has_superstructure_rc_non_engineered',
'has_superstructure_rc_engineered', 'has_superstructure_other',
'count_families', 'has_secondary_use', 'has_secondary_use_agriculture',
'has_secondary_use_hotel', 'has_secondary_use_rental',
'has_secondary_use_institution', 'has_secondary_use_school',
'has_secondary_use_industry', 'has_secondary_use_health_post',
'has_secondary_use_gov_office', 'has_secondary_use_use_police',
'has_secondary_use_other', 'damage_grade'],
dtype='object')
#encode categorical variables into separate boolean columns
finalDataset = pd.get_dummies(mergedDataset, drop_first=False)
pd.set_option('display.max_columns', None)
finalDataset.head()
| building_id | geo_level_1_id | geo_level_2_id | geo_level_3_id | count_floors_pre_eq | age | area_percentage | height_percentage | has_superstructure_adobe_mud | has_superstructure_mud_mortar_stone | has_superstructure_stone_flag | has_superstructure_cement_mortar_stone | has_superstructure_mud_mortar_brick | has_superstructure_cement_mortar_brick | has_superstructure_timber | has_superstructure_bamboo | has_superstructure_rc_non_engineered | has_superstructure_rc_engineered | has_superstructure_other | count_families | has_secondary_use | has_secondary_use_agriculture | has_secondary_use_hotel | has_secondary_use_rental | has_secondary_use_institution | has_secondary_use_school | has_secondary_use_industry | has_secondary_use_health_post | has_secondary_use_gov_office | has_secondary_use_use_police | has_secondary_use_other | damage_grade | land_surface_condition_n | land_surface_condition_o | land_surface_condition_t | foundation_type_h | foundation_type_i | foundation_type_r | foundation_type_u | foundation_type_w | roof_type_n | roof_type_q | roof_type_x | ground_floor_type_f | ground_floor_type_m | ground_floor_type_v | ground_floor_type_x | ground_floor_type_z | other_floor_type_j | other_floor_type_q | other_floor_type_s | other_floor_type_x | position_j | position_o | position_s | position_t | plan_configuration_a | plan_configuration_c | plan_configuration_d | plan_configuration_f | plan_configuration_m | plan_configuration_n | plan_configuration_o | plan_configuration_q | plan_configuration_s | plan_configuration_u | legal_ownership_status_a | legal_ownership_status_r | legal_ownership_status_v | legal_ownership_status_w | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 802906 | 6 | 487 | 12198 | 2 | 30 | 6 | 5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1 | 28830 | 8 | 900 | 2812 | 2 | 10 | 8 | 7 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 2 | 94947 | 21 | 363 | 8973 | 2 | 10 | 5 | 5 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 3 | 590882 | 22 | 418 | 10694 | 2 | 10 | 6 | 5 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 4 | 201944 | 11 | 131 | 1488 | 3 | 30 | 8 | 9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
#check for any NaN values in final dataset
finalDataset.isna().sum()
building_id 0
geo_level_1_id 0
geo_level_2_id 0
geo_level_3_id 0
count_floors_pre_eq 0
..
plan_configuration_u 0
legal_ownership_status_a 0
legal_ownership_status_r 0
legal_ownership_status_v 0
legal_ownership_status_w 0
Length: 70, dtype: int64
#check for any null values in final dataset
finalDataset.isnull().sum()
building_id 0
geo_level_1_id 0
geo_level_2_id 0
geo_level_3_id 0
count_floors_pre_eq 0
..
plan_configuration_u 0
legal_ownership_status_a 0
legal_ownership_status_r 0
legal_ownership_status_v 0
legal_ownership_status_w 0
Length: 70, dtype: int64
#check for any null values in final dataset
finalDataset.isnull().values.any()
False
#check for any duplicated values in final dataset
sum(finalDataset.duplicated())
0
The descriptive features of the final dataset are explored. This is similar to section 2.2, but used to ensure that no values have been accidentaly modified during the merge
#descriptive characteristics of the final dataset
finalDataset.describe()
| building_id | geo_level_1_id | geo_level_2_id | geo_level_3_id | count_floors_pre_eq | age | area_percentage | height_percentage | has_superstructure_adobe_mud | has_superstructure_mud_mortar_stone | has_superstructure_stone_flag | has_superstructure_cement_mortar_stone | has_superstructure_mud_mortar_brick | has_superstructure_cement_mortar_brick | has_superstructure_timber | has_superstructure_bamboo | has_superstructure_rc_non_engineered | has_superstructure_rc_engineered | has_superstructure_other | count_families | has_secondary_use | has_secondary_use_agriculture | has_secondary_use_hotel | has_secondary_use_rental | has_secondary_use_institution | has_secondary_use_school | has_secondary_use_industry | has_secondary_use_health_post | has_secondary_use_gov_office | has_secondary_use_use_police | has_secondary_use_other | damage_grade | land_surface_condition_n | land_surface_condition_o | land_surface_condition_t | foundation_type_h | foundation_type_i | foundation_type_r | foundation_type_u | foundation_type_w | roof_type_n | roof_type_q | roof_type_x | ground_floor_type_f | ground_floor_type_m | ground_floor_type_v | ground_floor_type_x | ground_floor_type_z | other_floor_type_j | other_floor_type_q | other_floor_type_s | other_floor_type_x | position_j | position_o | position_s | position_t | plan_configuration_a | plan_configuration_c | plan_configuration_d | plan_configuration_f | plan_configuration_m | plan_configuration_n | plan_configuration_o | plan_configuration_q | plan_configuration_s | plan_configuration_u | legal_ownership_status_a | legal_ownership_status_r | legal_ownership_status_v | legal_ownership_status_w | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 2.606010e+05 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.00000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 | 260601.000000 |
| mean | 5.256755e+05 | 13.900353 | 701.074685 | 6257.876148 | 2.129723 | 26.535029 | 8.018051 | 5.434365 | 0.088645 | 0.761935 | 0.034332 | 0.018235 | 0.068154 | 0.075268 | 0.254988 | 0.085011 | 0.042590 | 0.015859 | 0.014985 | 0.983949 | 0.111880 | 0.064378 | 0.033626 | 0.008101 | 0.000940 | 0.000361 | 0.001071 | 0.000188 | 0.000146 | 0.000088 | 0.005119 | 2.238272 | 0.136331 | 0.031911 | 0.831758 | 0.005556 | 0.040595 | 0.841117 | 0.054720 | 0.058012 | 0.701617 | 0.236285 | 0.062099 | 0.804368 | 0.001949 | 0.094370 | 0.09546 | 0.003853 | 0.152889 | 0.634234 | 0.046155 | 0.166722 | 0.050967 | 0.008952 | 0.775477 | 0.164604 | 0.000967 | 0.001247 | 0.959597 | 0.000084 | 0.000177 | 0.000146 | 0.000610 | 0.021842 | 0.001328 | 0.014002 | 0.021151 | 0.005652 | 0.962924 | 0.010272 |
| std | 3.045450e+05 | 8.033617 | 412.710734 | 3646.369645 | 0.727665 | 73.565937 | 4.392231 | 1.918418 | 0.284231 | 0.425900 | 0.182081 | 0.133800 | 0.252010 | 0.263824 | 0.435855 | 0.278899 | 0.201931 | 0.124932 | 0.121491 | 0.418389 | 0.315219 | 0.245426 | 0.180265 | 0.089638 | 0.030647 | 0.018989 | 0.032703 | 0.013711 | 0.012075 | 0.009394 | 0.071364 | 0.611814 | 0.343140 | 0.175763 | 0.374082 | 0.074334 | 0.197350 | 0.365567 | 0.227433 | 0.233767 | 0.457549 | 0.424800 | 0.241335 | 0.396687 | 0.044108 | 0.292344 | 0.29385 | 0.061950 | 0.359881 | 0.481645 | 0.209821 | 0.372728 | 0.219930 | 0.094193 | 0.417269 | 0.370824 | 0.031082 | 0.035293 | 0.196902 | 0.009188 | 0.013285 | 0.012075 | 0.024693 | 0.146167 | 0.036413 | 0.117500 | 0.143888 | 0.074969 | 0.188948 | 0.100831 |
| min | 4.000000e+00 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 1.000000 | 2.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 2.611900e+05 | 7.000000 | 350.000000 | 3073.000000 | 2.000000 | 10.000000 | 5.000000 | 4.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 2.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 |
| 50% | 5.257570e+05 | 12.000000 | 702.000000 | 6270.000000 | 2.000000 | 15.000000 | 7.000000 | 5.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 2.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 |
| 75% | 7.897620e+05 | 21.000000 | 1050.000000 | 9412.000000 | 2.000000 | 30.000000 | 9.000000 | 6.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 3.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 |
| max | 1.052934e+06 | 30.000000 | 1427.000000 | 12567.000000 | 9.000000 | 995.000000 | 100.000000 | 32.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 9.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 3.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.00000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
The distribution of some of the numerical continuous variables in the input dataset, along with the distribution and value counts of the damage grades are explored here
#plot value counts of the key numerical features of the input dataset
interestFeaturesDataset = pd.DataFrame(columns=['count_floors_pre_eq', 'age', 'area_percentage', 'height_percentage', 'count_families'])
q=1
plt.subplots(figsize=(20, 16))
for col in interestFeaturesDataset:
plt.subplot(3,2,q)
plt.hist(finalDataset[col], bins = 10)
plt.title(col + ' Distribution')
plt.xlabel('distribution')
q+=1
plt.show()
q=1
plt.subplots(figsize=(20, 16))
for col in interestFeaturesDataset:
plt.subplot(3,2,q)
#plot histplots with KDE
sns.histplot(finalDataset[col])
plt.title(col + ' Distribution')
plt.xlabel('distribution')
q+=1
plt.show()
From the images above, it can be identified that most of the numerical feature columns follow an approximate normal distribution with the majority of the values at the lower end of the x-axis and a few outliers at higher values.
It can also be noted that the area_percentage and building age distribution are slightly right-skewed
#get value counts of the different damage grades
finalDataset['damage_grade'].value_counts()
2 148259 3 87218 1 25124 Name: damage_grade, dtype: int64
#plot value counts of damage grades in a pie chart
fig, ax = plt.subplots()
ax.pie(finalDataset['damage_grade'].groupby(finalDataset['damage_grade']).count(),
labels = ['Low', 'Medium','High'], autopct = '%1.1f%%')
ax.axis('equal')
plt.show()
From the pie chart above, it can be seen that the majority (56.9%) of the buildings have suffered medium damage,33.5% have been nearly destroyed, and only 9.6% of the building have suffered low damage.
The fact that over 90% of the buildings have suffered medium damage to complete destruction and this research aims to identify how this number can be reduced
The correlation between all the different colums and the correlation between all the colums and the damage grade will be explored here. Heatmaps would be used for visualization purposes
#obtain and display confusion matrix of correlations of entire dataframe
corr = finalDataset.corr()
corr.style.background_gradient(cmap='coolwarm')
| building_id | geo_level_1_id | geo_level_2_id | geo_level_3_id | count_floors_pre_eq | age | area_percentage | height_percentage | has_superstructure_adobe_mud | has_superstructure_mud_mortar_stone | has_superstructure_stone_flag | has_superstructure_cement_mortar_stone | has_superstructure_mud_mortar_brick | has_superstructure_cement_mortar_brick | has_superstructure_timber | has_superstructure_bamboo | has_superstructure_rc_non_engineered | has_superstructure_rc_engineered | has_superstructure_other | count_families | has_secondary_use | has_secondary_use_agriculture | has_secondary_use_hotel | has_secondary_use_rental | has_secondary_use_institution | has_secondary_use_school | has_secondary_use_industry | has_secondary_use_health_post | has_secondary_use_gov_office | has_secondary_use_use_police | has_secondary_use_other | damage_grade | land_surface_condition_n | land_surface_condition_o | land_surface_condition_t | foundation_type_h | foundation_type_i | foundation_type_r | foundation_type_u | foundation_type_w | roof_type_n | roof_type_q | roof_type_x | ground_floor_type_f | ground_floor_type_m | ground_floor_type_v | ground_floor_type_x | ground_floor_type_z | other_floor_type_j | other_floor_type_q | other_floor_type_s | other_floor_type_x | position_j | position_o | position_s | position_t | plan_configuration_a | plan_configuration_c | plan_configuration_d | plan_configuration_f | plan_configuration_m | plan_configuration_n | plan_configuration_o | plan_configuration_q | plan_configuration_s | plan_configuration_u | legal_ownership_status_a | legal_ownership_status_r | legal_ownership_status_v | legal_ownership_status_w | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| building_id | 1.000000 | -0.002850 | 0.000347 | -0.000393 | -0.000654 | -0.001476 | -0.002070 | 0.000096 | -0.000307 | 0.002423 | 0.001443 | -0.000827 | -0.003768 | 0.001933 | 0.001967 | 0.001266 | 0.002472 | -0.002259 | 0.002171 | -0.000599 | 0.002606 | 0.002631 | 0.001934 | -0.002152 | 0.000706 | -0.000362 | 0.002348 | -0.000374 | 0.000538 | -0.003116 | -0.002295 | 0.001063 | 0.003831 | -0.001509 | -0.002806 | -0.000571 | -0.000334 | 0.002724 | -0.001303 | -0.002529 | -0.001501 | 0.000890 | 0.001280 | 0.000669 | 0.004536 | -0.000537 | -0.001068 | 0.000089 | 0.000029 | -0.001353 | 0.001674 | 0.000778 | 0.001395 | -0.002529 | 0.002843 | -0.003383 | 0.001827 | 0.001227 | -0.001833 | 0.002184 | -0.002395 | 0.001045 | -0.000097 | -0.000037 | 0.002985 | 0.001353 | -0.000714 | -0.002099 | 0.002637 | -0.002361 |
| geo_level_1_id | -0.002850 | 1.000000 | -0.061405 | 0.002718 | -0.089364 | -0.003908 | 0.071158 | -0.063474 | -0.018245 | -0.152038 | 0.007677 | 0.023656 | -0.028854 | 0.134121 | 0.144985 | 0.074169 | 0.016454 | 0.015767 | 0.022670 | 0.035108 | -0.010428 | -0.019786 | 0.001911 | 0.023523 | 0.003700 | 0.002977 | 0.002655 | -0.002303 | 0.001060 | 0.000523 | -0.017992 | -0.072347 | -0.028660 | -0.008616 | 0.030337 | 0.012918 | 0.003362 | -0.188061 | 0.077189 | 0.212048 | -0.027454 | 0.002146 | 0.048272 | -0.075405 | 0.009872 | 0.110563 | -0.009462 | -0.001048 | 0.112915 | -0.130033 | 0.020317 | 0.047570 | -0.004851 | -0.002604 | 0.024287 | -0.023791 | -0.001566 | 0.004566 | -0.030270 | 0.001674 | 0.004048 | 0.000822 | 0.008179 | 0.009383 | 0.008559 | 0.033052 | 0.100393 | 0.007874 | -0.082055 | 0.004646 |
| geo_level_2_id | 0.000347 | -0.061405 | 1.000000 | 0.000921 | 0.047730 | 0.012594 | -0.049443 | 0.035516 | 0.015833 | 0.076491 | -0.008743 | -0.009122 | 0.014246 | -0.079875 | -0.023927 | -0.032588 | -0.038173 | -0.036074 | -0.020144 | -0.012439 | -0.001519 | 0.017522 | -0.008439 | -0.030704 | -0.004840 | -0.004856 | 0.000687 | -0.000757 | -0.000152 | 0.001926 | -0.013068 | 0.043161 | 0.033524 | 0.005193 | -0.033191 | -0.036710 | -0.044893 | 0.097980 | -0.062514 | -0.042831 | 0.082802 | -0.051035 | -0.067152 | 0.099327 | 0.003788 | -0.078279 | -0.055428 | -0.006407 | -0.040881 | 0.070900 | -0.050574 | -0.023676 | -0.006580 | 0.004049 | -0.005419 | 0.008971 | -0.000613 | -0.006873 | 0.022083 | 0.000590 | -0.007971 | 0.000415 | -0.005786 | -0.004326 | -0.005928 | -0.025532 | -0.025893 | -0.027011 | 0.023189 | 0.013579 |
| geo_level_3_id | -0.000393 | 0.002718 | 0.000921 | 1.000000 | -0.021646 | -0.006385 | -0.005643 | -0.024507 | -0.015732 | 0.026294 | 0.019579 | 0.005966 | -0.023278 | -0.005116 | -0.003916 | 0.006143 | 0.002265 | -0.012420 | -0.010604 | -0.002479 | 0.007375 | 0.015995 | -0.002001 | -0.007356 | -0.007058 | -0.004373 | -0.000862 | -0.002632 | -0.000943 | 0.000269 | -0.002463 | 0.007932 | 0.012013 | 0.014912 | -0.018025 | 0.008930 | -0.010494 | 0.001456 | -0.007069 | 0.010621 | 0.002052 | 0.006104 | -0.014635 | 0.008916 | -0.002068 | -0.004325 | -0.006870 | -0.002622 | 0.009160 | 0.000706 | -0.012337 | -0.002812 | -0.015732 | -0.019633 | 0.017344 | -0.005199 | -0.004617 | 0.003044 | -0.001975 | -0.001072 | -0.005090 | 0.003123 | -0.002804 | 0.001895 | -0.003399 | 0.003240 | 0.034199 | -0.001309 | -0.018533 | -0.013100 |
| count_floors_pre_eq | -0.000654 | -0.089364 | 0.047730 | -0.021646 | 1.000000 | 0.086668 | 0.101071 | 0.772734 | 0.174852 | -0.027116 | -0.041058 | -0.011329 | 0.257279 | -0.085821 | -0.056637 | -0.070487 | 0.007448 | 0.049000 | -0.029888 | 0.086586 | 0.052125 | -0.005294 | 0.077120 | 0.035425 | 0.016384 | 0.008833 | -0.002611 | 0.006786 | 0.009639 | 0.003939 | -0.002073 | 0.122308 | -0.038525 | -0.023156 | 0.046218 | -0.040922 | 0.040634 | 0.131692 | -0.095874 | -0.133956 | -0.099278 | 0.085249 | 0.038167 | 0.057356 | -0.004770 | -0.070247 | 0.001164 | -0.037901 | -0.658161 | 0.353823 | 0.172380 | 0.081222 | 0.246396 | 0.219764 | -0.229465 | 0.056249 | -0.009958 | 0.017159 | 0.025091 | 0.003528 | 0.015891 | 0.002651 | 0.001148 | -0.033170 | -0.003314 | -0.004863 | -0.096866 | -0.008236 | 0.073273 | 0.007046 |
| age | -0.001476 | -0.003908 | 0.012594 | -0.006385 | 0.086668 | 1.000000 | -0.004323 | 0.061074 | 0.068032 | 0.001321 | 0.009129 | -0.012128 | 0.079525 | -0.036992 | 0.005855 | -0.008374 | -0.025431 | -0.025552 | -0.001520 | 0.005309 | -0.008788 | -0.002194 | -0.010021 | 0.001193 | -0.004189 | -0.003514 | -0.003658 | -0.002169 | -0.001764 | -0.001195 | -0.004534 | 0.029273 | -0.012087 | -0.015637 | 0.018434 | -0.004960 | -0.048860 | 0.072640 | -0.020131 | -0.051184 | 0.022504 | -0.001465 | -0.040087 | 0.043668 | -0.003589 | -0.052342 | -0.006343 | 0.000024 | -0.051634 | 0.054615 | -0.031045 | -0.003244 | 0.056348 | 0.069492 | -0.049696 | 0.004850 | 0.008640 | -0.000567 | 0.001872 | -0.001128 | 0.010677 | -0.001462 | -0.000917 | -0.001676 | -0.001011 | -0.003630 | -0.023928 | -0.001055 | 0.016697 | 0.003642 |
| area_percentage | -0.002070 | 0.071158 | -0.049443 | -0.005643 | 0.101071 | -0.004323 | 1.000000 | 0.196645 | 0.026287 | -0.225541 | -0.000473 | 0.073701 | 0.053362 | 0.210800 | -0.053965 | -0.031641 | 0.185047 | 0.224647 | 0.007928 | 0.088630 | 0.122401 | -0.016478 | 0.159885 | 0.105983 | 0.052212 | 0.050164 | 0.019421 | 0.015109 | 0.015290 | 0.004983 | 0.013111 | -0.125221 | -0.028422 | 0.000541 | 0.025817 | -0.008922 | 0.301688 | -0.198506 | 0.126860 | -0.064849 | -0.148760 | -0.043030 | 0.357777 | -0.258259 | 0.009583 | 0.328150 | 0.020847 | -0.000538 | -0.037512 | -0.129557 | 0.317857 | 0.024702 | 0.070492 | -0.010139 | -0.047300 | 0.013992 | 0.007714 | 0.047805 | -0.089410 | 0.002054 | 0.022963 | 0.003568 | 0.035633 | 0.016648 | 0.019164 | 0.096170 | -0.028249 | -0.015343 | 0.029261 | -0.003113 |
| height_percentage | 0.000096 | -0.063474 | 0.035516 | -0.024507 | 0.772734 | 0.061074 | 0.196645 | 1.000000 | 0.149725 | -0.106573 | -0.020260 | 0.012960 | 0.209098 | 0.001698 | -0.052402 | -0.063342 | 0.086228 | 0.129698 | -0.019266 | 0.064316 | 0.091780 | -0.005390 | 0.123551 | 0.068909 | 0.031366 | 0.020032 | 0.001946 | 0.011192 | 0.014660 | 0.004048 | 0.005397 | 0.048130 | -0.019977 | -0.012589 | 0.024240 | -0.027473 | 0.163201 | 0.008661 | -0.036728 | -0.106852 | -0.126140 | 0.039480 | 0.169657 | -0.052502 | 0.000424 | 0.062001 | 0.014229 | -0.024187 | -0.531358 | 0.202727 | 0.305874 | 0.078891 | 0.229007 | 0.157560 | -0.216107 | 0.067331 | -0.004534 | 0.028838 | 0.001975 | 0.003580 | 0.014457 | 0.002732 | 0.006637 | -0.016496 | -0.000126 | 0.006197 | -0.078852 | -0.008693 | 0.063050 | 0.000837 |
| has_superstructure_adobe_mud | -0.000307 | -0.018245 | 0.015833 | -0.015732 | 0.174852 | 0.068032 | 0.026287 | 0.149725 | 1.000000 | -0.306861 | 0.006962 | -0.014554 | 0.314979 | -0.037549 | 0.011694 | 0.011528 | -0.036830 | -0.036782 | 0.057545 | 0.033068 | -0.013300 | -0.003917 | -0.012642 | -0.003935 | -0.004281 | -0.002369 | 0.001762 | -0.003292 | -0.002648 | -0.001493 | -0.010074 | 0.055314 | -0.005327 | -0.019600 | 0.014095 | 0.017915 | -0.061759 | 0.098101 | -0.051293 | -0.057067 | 0.067185 | -0.042600 | -0.052391 | 0.061781 | 0.005500 | -0.078740 | -0.002399 | -0.016562 | -0.067859 | 0.043799 | -0.050588 | 0.037400 | 0.134657 | 0.111681 | -0.149165 | 0.059616 | -0.004491 | -0.004518 | -0.023631 | 0.001543 | 0.005002 | -0.001530 | -0.002785 | 0.017312 | 0.005313 | 0.019020 | -0.030176 | 0.025108 | -0.004681 | 0.033165 |
| has_superstructure_mud_mortar_stone | 0.002423 | -0.152038 | 0.076491 | 0.026294 | -0.027116 | 0.001321 | -0.225541 | -0.106573 | -0.306861 | 1.000000 | -0.034046 | -0.104288 | -0.376208 | -0.470715 | -0.040177 | -0.054657 | -0.222366 | -0.224509 | -0.041705 | 0.001576 | -0.087520 | 0.058261 | -0.159532 | -0.117948 | -0.036064 | -0.023070 | -0.025507 | -0.008763 | -0.011904 | -0.003380 | 0.005628 | 0.291325 | 0.074832 | 0.023414 | -0.079643 | -0.107303 | -0.359323 | 0.538937 | -0.260836 | -0.251562 | 0.184909 | 0.049611 | -0.437898 | 0.382414 | -0.032287 | -0.487756 | -0.012591 | -0.064281 | -0.206889 | 0.325957 | -0.366995 | -0.014855 | -0.225523 | -0.143059 | 0.199138 | -0.053987 | -0.015076 | -0.028242 | 0.110799 | 0.000233 | -0.013597 | 0.000035 | -0.020852 | -0.050911 | -0.017476 | -0.098556 | -0.138746 | -0.047872 | 0.126641 | -0.003726 |
| has_superstructure_stone_flag | 0.001443 | 0.007677 | -0.008743 | 0.019579 | -0.041058 | 0.009129 | -0.000473 | -0.020260 | 0.006962 | -0.034046 | 1.000000 | 0.037307 | -0.033348 | -0.043729 | 0.125311 | 0.078466 | 0.008448 | -0.021406 | 0.065732 | 0.004564 | -0.001738 | 0.010563 | -0.008868 | -0.011632 | -0.000971 | -0.001362 | -0.004240 | -0.002586 | 0.002959 | 0.000472 | 0.002127 | 0.066039 | 0.041656 | 0.016846 | -0.046125 | 0.138152 | -0.034514 | 0.014212 | -0.015621 | -0.021820 | 0.020894 | 0.001883 | -0.042929 | -0.005933 | 0.006001 | -0.044935 | 0.026530 | 0.119927 | 0.008790 | -0.086745 | -0.035450 | 0.123562 | 0.002012 | -0.008076 | -0.016222 | 0.019112 | -0.002476 | -0.004871 | -0.003052 | -0.001733 | 0.000667 | -0.000532 | 0.005583 | 0.011763 | 0.003543 | -0.009556 | -0.008384 | -0.004939 | 0.005545 | 0.005245 |
| has_superstructure_cement_mortar_stone | -0.000827 | 0.023656 | -0.009122 | 0.005966 | -0.011329 | -0.012128 | 0.073701 | 0.012960 | -0.014554 | -0.104288 | 0.037307 | 1.000000 | -0.001692 | 0.078739 | 0.014430 | -0.004008 | 0.076355 | 0.025168 | 0.012462 | -0.006356 | 0.042156 | -0.016117 | 0.072104 | 0.033757 | 0.007985 | 0.006473 | 0.006939 | 0.004406 | 0.012605 | 0.004825 | 0.014337 | -0.060295 | 0.009792 | 0.008380 | -0.012919 | -0.002471 | 0.064682 | -0.142311 | 0.189400 | -0.015541 | -0.015550 | -0.029896 | 0.082105 | -0.139993 | 0.029088 | 0.134846 | 0.049031 | 0.006802 | 0.023387 | -0.070494 | 0.084153 | 0.021139 | 0.023447 | 0.005011 | -0.025366 | 0.013364 | -0.000549 | 0.005748 | -0.024325 | 0.001869 | 0.000348 | -0.001646 | 0.005924 | 0.012402 | 0.006057 | 0.020615 | 0.009067 | -0.006450 | -0.001186 | -0.005920 |
| has_superstructure_mud_mortar_brick | -0.003768 | -0.028854 | 0.014246 | -0.023278 | 0.257279 | 0.079525 | 0.053362 | 0.209098 | 0.314979 | -0.376208 | -0.033348 | -0.001692 | 1.000000 | 0.030656 | -0.001461 | -0.000267 | -0.028914 | -0.026531 | 0.026302 | 0.031557 | -0.010439 | -0.038678 | 0.025152 | 0.018368 | -0.002334 | -0.000326 | 0.011168 | 0.000733 | -0.003266 | -0.000920 | -0.004890 | 0.014561 | -0.050382 | -0.035672 | 0.062975 | -0.010588 | -0.043979 | 0.071139 | -0.016662 | -0.054542 | 0.023248 | -0.034398 | 0.016471 | -0.029532 | 0.017736 | -0.005787 | 0.045420 | -0.011657 | -0.036999 | 0.021699 | 0.010323 | 0.001873 | 0.258090 | 0.217264 | -0.223701 | 0.043463 | -0.003025 | 0.007701 | -0.031969 | 0.002487 | 0.012453 | -0.000744 | 0.004417 | 0.006362 | 0.001848 | 0.041119 | -0.019224 | 0.027949 | -0.000846 | 0.008238 |
| has_superstructure_cement_mortar_brick | 0.001933 | 0.134121 | -0.079875 | -0.005116 | -0.085821 | -0.036992 | 0.210800 | 0.001698 | -0.037549 | -0.470715 | -0.043729 | 0.078739 | 0.030656 | 1.000000 | -0.058652 | -0.054941 | 0.138915 | 0.121070 | -0.005977 | -0.015371 | 0.076756 | -0.054273 | 0.139541 | 0.109545 | 0.032063 | 0.019858 | 0.026241 | 0.008817 | 0.007396 | 0.005061 | 0.000121 | -0.254131 | -0.054304 | -0.021262 | 0.059802 | 0.005285 | 0.246290 | -0.405372 | 0.432362 | 0.003677 | -0.154182 | -0.072475 | 0.419886 | -0.463885 | 0.033557 | 0.573645 | 0.052841 | -0.011169 | 0.224796 | -0.290010 | 0.356077 | -0.042740 | 0.097038 | 0.053953 | -0.087631 | 0.027350 | 0.010310 | 0.024537 | -0.089713 | -0.002621 | 0.010442 | 0.000168 | 0.015334 | 0.029711 | 0.015960 | 0.094121 | 0.076331 | 0.004875 | -0.052481 | -0.014208 |
| has_superstructure_timber | 0.001967 | 0.144985 | -0.023927 | -0.003916 | -0.056637 | 0.005855 | -0.053965 | -0.052402 | 0.011694 | -0.040177 | 0.125311 | 0.014430 | -0.001461 | -0.058652 | 1.000000 | 0.438311 | -0.027385 | -0.068981 | 0.104372 | -0.001544 | -0.022802 | 0.003303 | -0.027763 | -0.026546 | -0.005306 | -0.003695 | -0.000577 | -0.004812 | -0.004148 | -0.002685 | -0.014453 | -0.069852 | 0.034530 | 0.028878 | -0.045242 | 0.047586 | -0.109812 | -0.126082 | -0.048161 | 0.321598 | 0.000588 | 0.073469 | -0.130435 | 0.065689 | -0.004099 | -0.096547 | 0.001009 | 0.033112 | -0.010066 | -0.042880 | -0.110732 | 0.127464 | -0.027171 | 0.003749 | 0.052212 | -0.043589 | 0.010691 | -0.001714 | -0.001889 | 0.001332 | -0.001809 | 0.000956 | 0.005511 | -0.015985 | 0.011793 | 0.015926 | 0.102886 | 0.011204 | -0.072237 | -0.019786 |
| has_superstructure_bamboo | 0.001266 | 0.074169 | -0.032588 | 0.006143 | -0.070487 | -0.008374 | -0.031641 | -0.063342 | 0.011528 | -0.054657 | 0.078466 | -0.004008 | -0.000267 | -0.054941 | 0.438311 | 1.000000 | 0.020200 | -0.037263 | 0.117216 | 0.000250 | -0.022199 | 0.004472 | -0.030755 | -0.019104 | -0.004861 | -0.003616 | -0.003247 | -0.004180 | -0.002542 | -0.002864 | -0.008561 | -0.063051 | 0.022884 | 0.020904 | -0.030813 | 0.010347 | -0.059702 | -0.136061 | -0.039399 | 0.298215 | -0.023744 | 0.067672 | -0.074099 | 0.078491 | -0.005985 | -0.070580 | -0.034266 | -0.002743 | 0.028976 | -0.009336 | -0.063312 | 0.019728 | -0.033477 | -0.011296 | 0.054739 | -0.038871 | 0.007781 | -0.008042 | -0.007401 | 0.001692 | -0.001979 | -0.001402 | 0.004727 | -0.005543 | 0.013446 | 0.014730 | 0.080457 | 0.032627 | -0.071334 | -0.005400 |
| has_superstructure_rc_non_engineered | 0.002472 | 0.016454 | -0.038173 | 0.002265 | 0.007448 | -0.025431 | 0.185047 | 0.086228 | -0.036830 | -0.222366 | 0.008448 | 0.076355 | -0.028914 | 0.138915 | -0.027385 | 0.020200 | 1.000000 | -0.012324 | -0.018506 | -0.014073 | 0.108347 | -0.023192 | 0.157997 | 0.102838 | 0.036314 | 0.021012 | 0.015176 | 0.008195 | 0.005322 | 0.002064 | 0.000582 | -0.158145 | -0.008702 | 0.008846 | 0.003826 | -0.008352 | 0.499215 | -0.289362 | 0.054450 | -0.019256 | -0.181339 | -0.058043 | 0.445971 | -0.258811 | 0.003173 | 0.362685 | -0.011091 | -0.003914 | 0.062207 | -0.197049 | 0.373658 | -0.015777 | 0.078829 | 0.007997 | -0.071683 | 0.031878 | 0.005666 | 0.029699 | -0.041168 | 0.002199 | 0.002919 | -0.002547 | 0.010949 | 0.019966 | 0.006400 | 0.029206 | -0.001817 | 0.005137 | 0.002867 | -0.006599 |
| has_superstructure_rc_engineered | -0.002259 | 0.015767 | -0.036074 | -0.012420 | 0.049000 | -0.025552 | 0.224647 | 0.129698 | -0.036782 | -0.224509 | -0.021406 | 0.025168 | -0.026531 | 0.121070 | -0.068981 | -0.037263 | -0.012324 | 1.000000 | -0.009843 | -0.014364 | 0.104222 | -0.029545 | 0.139722 | 0.131416 | 0.050225 | 0.025087 | 0.005236 | 0.011700 | 0.031536 | 0.005347 | 0.008110 | -0.179014 | -0.025015 | -0.011165 | 0.028191 | -0.007836 | 0.540876 | -0.283009 | 0.019696 | -0.030715 | -0.182108 | -0.069092 | 0.466875 | -0.251369 | -0.000736 | 0.362155 | -0.019812 | -0.004920 | 0.037306 | -0.165441 | 0.412847 | -0.054640 | 0.081470 | 0.007500 | -0.079576 | 0.039318 | 0.004944 | 0.029456 | -0.034788 | -0.001166 | 0.005249 | 0.003555 | 0.003083 | 0.025159 | 0.003806 | 0.014150 | -0.009482 | -0.000967 | 0.012230 | -0.008668 |
| has_superstructure_other | 0.002171 | 0.022670 | -0.020144 | -0.010604 | -0.029888 | -0.001520 | 0.007928 | -0.019266 | 0.057545 | -0.041705 | 0.065732 | 0.012462 | 0.026302 | -0.005977 | 0.104372 | 0.117216 | -0.018506 | -0.009843 | 1.000000 | 0.000806 | -0.000490 | -0.006100 | 0.007830 | 0.000130 | -0.002753 | 0.000984 | -0.001140 | 0.000612 | -0.001489 | -0.001159 | 0.006201 | -0.030224 | 0.033655 | 0.007977 | -0.034619 | 0.109329 | -0.019929 | -0.032881 | 0.006294 | 0.027355 | 0.017685 | -0.009717 | -0.016424 | -0.002075 | 0.026773 | -0.006970 | 0.004861 | 0.004056 | 0.025537 | -0.028571 | -0.013583 | 0.019910 | -0.002014 | -0.000322 | -0.000245 | 0.001552 | 0.013438 | 0.002801 | -0.016398 | 0.002304 | 0.000739 | 0.001126 | 0.000790 | 0.008796 | 0.003309 | 0.010570 | 0.016991 | 0.009238 | -0.014078 | -0.004734 |
| count_families | -0.000599 | 0.035108 | -0.012439 | -0.002479 | 0.086586 | 0.005309 | 0.088630 | 0.064316 | 0.033068 | 0.001576 | 0.004564 | -0.006356 | 0.031557 | -0.015371 | -0.001544 | 0.000250 | -0.014073 | -0.014364 | 0.000806 | 1.000000 | -0.054875 | -0.040685 | -0.034615 | 0.022703 | -0.022465 | -0.020040 | -0.021461 | -0.010846 | -0.004854 | -0.014284 | -0.016783 | 0.056151 | -0.008840 | -0.000705 | 0.008440 | -0.001081 | -0.026917 | 0.034281 | -0.014280 | -0.016648 | -0.013092 | 0.028033 | -0.024521 | 0.019229 | -0.003711 | -0.028777 | 0.007064 | -0.018193 | -0.055671 | 0.036066 | -0.009483 | 0.012485 | 0.013228 | 0.017473 | -0.005257 | -0.006368 | -0.004118 | 0.001356 | -0.004938 | 0.001351 | -0.000181 | 0.000463 | 0.006891 | -0.006440 | -0.000112 | 0.015422 | -0.009021 | 0.001424 | 0.001646 | 0.008729 |
| has_secondary_use | 0.002606 | -0.010428 | -0.001519 | 0.007375 | 0.052125 | -0.008788 | 0.122401 | 0.091780 | -0.013300 | -0.087520 | -0.001738 | 0.042156 | -0.010439 | 0.076756 | -0.022802 | -0.022199 | 0.108347 | 0.104222 | -0.000490 | -0.054875 | 1.000000 | 0.739059 | 0.525564 | 0.254614 | 0.086429 | 0.053520 | 0.092237 | 0.038638 | 0.034025 | 0.026470 | 0.202099 | -0.079630 | -0.005813 | 0.007868 | 0.001636 | 0.003111 | 0.167006 | -0.106781 | 0.035894 | -0.009915 | -0.029727 | -0.056887 | 0.156494 | -0.113672 | 0.013845 | 0.150721 | -0.004277 | 0.027053 | -0.015683 | -0.052286 | 0.181789 | -0.019627 | 0.100408 | 0.004521 | -0.099449 | 0.051205 | 0.006191 | 0.023331 | -0.015952 | -0.000611 | 0.008113 | -0.000253 | 0.006020 | -0.000735 | 0.003774 | 0.015722 | 0.017540 | 0.006853 | -0.024677 | 0.016117 |
| has_secondary_use_agriculture | 0.002631 | -0.019786 | 0.017522 | 0.015995 | -0.005294 | -0.002194 | -0.016478 | -0.005390 | -0.003917 | 0.058261 | 0.010563 | -0.016117 | -0.038678 | -0.054273 | 0.003303 | 0.004472 | -0.023192 | -0.029545 | -0.006100 | -0.040685 | 0.739059 | 1.000000 | -0.048931 | -0.023705 | -0.008047 | -0.004983 | -0.008587 | -0.003597 | -0.003168 | -0.002464 | 0.085034 | 0.011309 | 0.005458 | 0.003259 | -0.006537 | 0.002899 | -0.043896 | 0.038261 | -0.030320 | 0.005801 | 0.045072 | -0.017783 | -0.054151 | 0.057117 | -0.002022 | -0.061304 | -0.019024 | 0.015235 | -0.033454 | 0.056337 | -0.044810 | -0.015273 | -0.025527 | -0.020781 | 0.007712 | 0.011740 | -0.003131 | -0.003510 | 0.017456 | -0.000708 | -0.003485 | -0.000578 | -0.002049 | -0.014274 | -0.002694 | -0.007840 | -0.006178 | 0.004624 | -0.002232 | 0.009561 |
| has_secondary_use_hotel | 0.001934 | 0.001911 | -0.008439 | -0.002001 | 0.077120 | -0.010021 | 0.159885 | 0.123551 | -0.012642 | -0.159532 | -0.008868 | 0.072104 | 0.025152 | 0.139541 | -0.027763 | -0.030755 | 0.157997 | 0.139722 | 0.007830 | -0.034615 | 0.525564 | -0.048931 | 1.000000 | -0.016857 | -0.005722 | -0.003543 | -0.006107 | -0.002558 | -0.002253 | -0.001753 | 0.003920 | -0.097942 | -0.011890 | 0.007432 | 0.007415 | 0.002666 | 0.241753 | -0.164075 | 0.070899 | -0.017334 | -0.073381 | -0.052694 | 0.231876 | -0.191555 | 0.020230 | 0.243568 | 0.007931 | 0.025166 | 0.009360 | -0.117951 | 0.257442 | -0.001541 | 0.181614 | 0.027696 | -0.152560 | 0.056921 | 0.009949 | 0.034423 | -0.034914 | 0.000603 | 0.016750 | 0.001273 | 0.010908 | 0.012321 | 0.007813 | 0.023424 | 0.038265 | 0.008651 | -0.040682 | 0.015197 |
| has_secondary_use_rental | -0.002152 | 0.023523 | -0.030704 | -0.007356 | 0.035425 | 0.001193 | 0.105983 | 0.068909 | -0.003935 | -0.117948 | -0.011632 | 0.033757 | 0.018368 | 0.109545 | -0.026546 | -0.019104 | 0.102838 | 0.131416 | 0.000130 | 0.022703 | 0.254614 | -0.023705 | -0.016857 | 1.000000 | -0.002772 | -0.001717 | -0.002958 | -0.001239 | -0.001091 | -0.000849 | 0.002516 | -0.083754 | -0.008707 | -0.000819 | 0.008372 | -0.002724 | 0.176422 | -0.117407 | 0.047713 | -0.010889 | -0.069152 | -0.032026 | 0.187479 | -0.127559 | 0.006682 | 0.159435 | 0.011871 | 0.003363 | 0.010498 | -0.077937 | 0.193737 | -0.018486 | 0.046600 | 0.015044 | -0.053352 | 0.028575 | 0.010962 | 0.012575 | -0.022548 | -0.000830 | 0.002022 | -0.001091 | 0.001234 | 0.006997 | 0.002583 | 0.021292 | 0.003079 | -0.005100 | -0.000846 | 0.000983 |
| has_secondary_use_institution | 0.000706 | 0.003700 | -0.004840 | -0.007058 | 0.016384 | -0.004189 | 0.052212 | 0.031366 | -0.004281 | -0.036064 | -0.000971 | 0.007985 | -0.002334 | 0.032063 | -0.005306 | -0.004861 | 0.036314 | 0.050225 | -0.002753 | -0.022465 | 0.086429 | -0.008047 | -0.005722 | -0.002772 | 1.000000 | -0.000583 | -0.001004 | -0.000421 | -0.000370 | -0.000288 | 0.004818 | -0.028728 | -0.004525 | 0.002267 | 0.003086 | 0.002760 | 0.065383 | -0.041811 | 0.014641 | -0.004935 | -0.026242 | -0.008810 | 0.065260 | -0.042949 | 0.001483 | 0.059481 | -0.001870 | 0.002135 | -0.002247 | -0.027916 | 0.074409 | -0.003644 | 0.013956 | -0.002916 | -0.011100 | 0.004954 | -0.000954 | 0.009559 | -0.010239 | -0.000282 | -0.000408 | -0.000370 | -0.000758 | 0.005695 | 0.002320 | 0.007000 | -0.002769 | -0.000643 | 0.002706 | -0.000642 |
| has_secondary_use_school | -0.000362 | 0.002977 | -0.004856 | -0.004373 | 0.008833 | -0.003514 | 0.050164 | 0.020032 | -0.002369 | -0.023070 | -0.001362 | 0.006473 | -0.000326 | 0.019858 | -0.003695 | -0.003616 | 0.021012 | 0.025087 | 0.000984 | -0.020040 | 0.053520 | -0.004983 | -0.003543 | -0.001717 | -0.000583 | 1.000000 | -0.000622 | -0.000260 | -0.000229 | -0.000178 | 0.001469 | -0.011692 | -0.004014 | -0.002299 | 0.004762 | -0.001420 | 0.035004 | -0.027123 | 0.014089 | -0.000392 | -0.010579 | -0.006760 | 0.031956 | -0.028839 | -0.000840 | 0.036725 | 0.001394 | 0.005343 | 0.003161 | -0.019559 | 0.043014 | -0.001991 | 0.006624 | -0.001805 | -0.005761 | 0.003012 | -0.000591 | 0.022232 | -0.012523 | -0.000175 | 0.014959 | -0.000229 | 0.015898 | 0.001309 | -0.000693 | 0.008055 | -0.002792 | -0.001432 | -0.001620 | 0.008086 |
| has_secondary_use_industry | 0.002348 | 0.002655 | 0.000687 | -0.000862 | -0.002611 | -0.003658 | 0.019421 | 0.001946 | 0.001762 | -0.025507 | -0.004240 | 0.006939 | 0.011168 | 0.026241 | -0.000577 | -0.003247 | 0.015176 | 0.005236 | -0.001140 | -0.021461 | 0.092237 | -0.008587 | -0.006107 | -0.002958 | -0.001004 | -0.000622 | 1.000000 | -0.000449 | -0.000395 | -0.000308 | 0.004229 | -0.011024 | -0.002406 | -0.000603 | 0.002490 | 0.000710 | 0.012292 | -0.019153 | 0.017404 | 0.002417 | -0.005578 | -0.003017 | 0.015887 | -0.018759 | 0.006534 | 0.024352 | 0.000546 | -0.002036 | 0.015110 | -0.014362 | 0.015727 | -0.004884 | 0.006819 | 0.009346 | -0.008537 | 0.003188 | -0.001019 | 0.005493 | -0.005201 | -0.000301 | -0.000435 | -0.000395 | -0.000809 | 0.002333 | 0.002029 | 0.004088 | 0.004158 | 0.000662 | -0.001649 | -0.003335 |
| has_secondary_use_health_post | -0.000374 | -0.002303 | -0.000757 | -0.002632 | 0.006786 | -0.002169 | 0.015109 | 0.011192 | -0.003292 | -0.008763 | -0.002586 | 0.004406 | 0.000733 | 0.008817 | -0.004812 | -0.004180 | 0.008195 | 0.011700 | 0.000612 | -0.010846 | 0.038638 | -0.003597 | -0.002558 | -0.001239 | -0.000421 | -0.000260 | -0.000449 | 1.000000 | -0.000166 | -0.000129 | -0.000984 | -0.008543 | -0.001370 | 0.000695 | 0.000931 | -0.001025 | 0.017033 | -0.011648 | 0.007776 | -0.003403 | -0.004514 | -0.004993 | 0.017345 | -0.012991 | -0.000606 | 0.018549 | -0.000645 | -0.000853 | -0.001160 | -0.006437 | 0.019659 | -0.001629 | 0.009547 | 0.001668 | -0.008048 | 0.002969 | -0.000427 | -0.000485 | -0.004293 | -0.000126 | -0.000182 | -0.000166 | -0.000339 | 0.005610 | -0.000500 | 0.000748 | 0.001874 | -0.001034 | -0.000271 | -0.001397 |
| has_secondary_use_gov_office | 0.000538 | 0.001060 | -0.000152 | -0.000943 | 0.009639 | -0.001764 | 0.015290 | 0.014660 | -0.002648 | -0.011904 | 0.002959 | 0.012605 | -0.003266 | 0.007396 | -0.004148 | -0.002542 | 0.005322 | 0.031536 | -0.001489 | -0.004854 | 0.034025 | -0.003168 | -0.002253 | -0.001091 | -0.000370 | -0.000229 | -0.000395 | -0.000166 | 1.000000 | -0.000113 | 0.003587 | -0.009378 | -0.000167 | -0.002193 | 0.001184 | -0.000903 | 0.026502 | -0.013007 | -0.000111 | -0.001637 | -0.010183 | -0.001480 | 0.021912 | -0.015675 | -0.000534 | 0.020017 | 0.001484 | -0.000751 | -0.003364 | -0.008644 | 0.026121 | -0.000286 | 0.002981 | -0.001148 | -0.007973 | 0.007495 | -0.000376 | -0.000427 | 0.000864 | -0.000111 | -0.000160 | -0.000146 | -0.000298 | 0.000370 | -0.000440 | -0.001439 | 0.000433 | -0.000910 | 0.000688 | -0.001230 |
| has_secondary_use_use_police | -0.003116 | 0.000523 | 0.001926 | 0.000269 | 0.003939 | -0.001195 | 0.004983 | 0.004048 | -0.001493 | -0.003380 | 0.000472 | 0.004825 | -0.000920 | 0.005061 | -0.002685 | -0.002864 | 0.002064 | 0.005347 | -0.001159 | -0.014284 | 0.026470 | -0.002464 | -0.001753 | -0.000849 | -0.000288 | -0.000178 | -0.000308 | -0.000129 | -0.000113 | 1.000000 | -0.000674 | -0.001656 | -0.002542 | -0.001706 | 0.003133 | -0.000702 | 0.008417 | -0.004856 | 0.003128 | -0.002331 | -0.006372 | 0.002467 | 0.007738 | -0.002575 | -0.000415 | 0.006748 | -0.003052 | -0.000584 | -0.000586 | -0.003042 | 0.005721 | 0.001277 | 0.001537 | -0.000893 | 0.000161 | -0.000866 | -0.000292 | -0.000332 | -0.006370 | -0.000086 | -0.000125 | -0.000113 | -0.000232 | 0.006980 | 0.010875 | -0.001120 | -0.001381 | -0.000708 | 0.001844 | -0.000957 |
| has_secondary_use_other | -0.002295 | -0.017992 | -0.013068 | -0.002463 | -0.002073 | -0.004534 | 0.013111 | 0.005397 | -0.010074 | 0.005628 | 0.002127 | 0.014337 | -0.004890 | 0.000121 | -0.014453 | -0.008561 | 0.000582 | 0.008110 | 0.006201 | -0.016783 | 0.202099 | 0.085034 | 0.003920 | 0.002516 | 0.004818 | 0.001469 | 0.004229 | -0.000984 | 0.003587 | -0.000674 | 1.000000 | -0.016334 | 0.008326 | 0.014511 | -0.014455 | -0.002468 | 0.009222 | -0.002802 | 0.007803 | -0.010210 | 0.001533 | -0.006735 | 0.008948 | -0.035653 | 0.009021 | 0.012344 | 0.034888 | -0.001857 | -0.003280 | -0.015414 | 0.010105 | 0.017397 | 0.002447 | 0.000033 | -0.008439 | 0.008036 | 0.001228 | -0.001011 | 0.001884 | -0.000659 | -0.000953 | -0.000866 | 0.000405 | -0.003361 | -0.001139 | 0.001520 | -0.002696 | -0.001822 | -0.005561 | 0.015623 |
| damage_grade | 0.001063 | -0.072347 | 0.043161 | 0.007932 | 0.122308 | 0.029273 | -0.125221 | 0.048130 | 0.055314 | 0.291325 | 0.066039 | -0.060295 | 0.014561 | -0.254131 | -0.069852 | -0.063051 | -0.158145 | -0.179014 | -0.030224 | 0.056151 | -0.079630 | 0.011309 | -0.097942 | -0.083754 | -0.028728 | -0.011692 | -0.011024 | -0.008543 | -0.009378 | -0.001656 | -0.016334 | 1.000000 | 0.008530 | 0.015078 | -0.014909 | -0.016202 | -0.263901 | 0.343355 | -0.139452 | -0.173328 | 0.078612 | 0.074460 | -0.280106 | 0.234755 | -0.019345 | -0.318720 | 0.006606 | -0.016728 | -0.135668 | 0.166759 | -0.226820 | 0.043188 | -0.032275 | -0.004787 | -0.027769 | 0.051605 | -0.019583 | -0.023181 | 0.042804 | 0.000517 | -0.007063 | -0.001587 | -0.013941 | 0.008099 | -0.011272 | -0.062317 | -0.082705 | -0.002591 | 0.048238 | 0.029555 |
| land_surface_condition_n | 0.003831 | -0.028660 | 0.033524 | 0.012013 | -0.038525 | -0.012087 | -0.028422 | -0.019977 | -0.005327 | 0.074832 | 0.041656 | 0.009792 | -0.050382 | -0.054304 | 0.034530 | 0.022884 | -0.008702 | -0.025015 | 0.033655 | -0.008840 | -0.005813 | 0.005458 | -0.011890 | -0.008707 | -0.004525 | -0.004014 | -0.002406 | -0.001370 | -0.000167 | -0.002542 | 0.008326 | 0.008530 | 1.000000 | -0.072133 | -0.883395 | -0.004123 | -0.028120 | 0.026638 | -0.020410 | 0.003250 | 0.043577 | -0.023711 | -0.040881 | 0.028090 | 0.005259 | -0.050064 | 0.005385 | 0.027100 | -0.001642 | -0.018646 | -0.029515 | 0.042295 | -0.030546 | -0.022565 | 0.024974 | -0.004254 | 0.014624 | -0.001048 | -0.001225 | 0.002435 | -0.000228 | -0.002946 | 0.000599 | 0.012777 | -0.002509 | -0.016605 | 0.001053 | 0.008232 | 0.001020 | -0.009533 |
| land_surface_condition_o | -0.001509 | -0.008616 | 0.005193 | 0.014912 | -0.023156 | -0.015637 | 0.000541 | -0.012589 | -0.019600 | 0.023414 | 0.016846 | 0.008380 | -0.035672 | -0.021262 | 0.028878 | 0.020904 | 0.008846 | -0.011165 | 0.007977 | -0.000705 | 0.007868 | 0.003259 | 0.007432 | -0.000819 | 0.002267 | -0.002299 | -0.000603 | 0.000695 | -0.002193 | -0.001706 | 0.014511 | 0.015078 | -0.072133 | 1.000000 | -0.403685 | -0.003292 | -0.005485 | -0.002253 | -0.014404 | 0.023215 | 0.015381 | -0.007089 | -0.016683 | 0.006212 | -0.002579 | -0.023284 | 0.014648 | 0.002453 | 0.001491 | -0.008444 | -0.006433 | 0.013093 | -0.016959 | -0.012620 | 0.017535 | -0.006467 | 0.006293 | 0.002245 | -0.001997 | 0.000708 | -0.000769 | -0.000384 | -0.003602 | 0.006328 | 0.009568 | -0.009001 | 0.025052 | 0.007279 | -0.019143 | -0.005289 |
| land_surface_condition_t | -0.002806 | 0.030337 | -0.033191 | -0.018025 | 0.046218 | 0.018434 | 0.025817 | 0.024240 | 0.014095 | -0.079643 | -0.046125 | -0.012919 | 0.062975 | 0.059802 | -0.045242 | -0.030813 | 0.003826 | 0.028191 | -0.034619 | 0.008440 | 0.001636 | -0.006537 | 0.007415 | 0.008372 | 0.003086 | 0.004762 | 0.002490 | 0.000931 | 0.001184 | 0.003133 | -0.014455 | -0.014909 | -0.883395 | -0.403685 | 1.000000 | 0.005329 | 0.028371 | -0.023376 | 0.025489 | -0.013889 | -0.047200 | 0.025081 | 0.045338 | -0.028686 | -0.003612 | 0.056864 | -0.011822 | -0.026011 | 0.000806 | 0.021071 | 0.030097 | -0.044948 | 0.035988 | 0.026628 | -0.031147 | 0.006941 | -0.016371 | -0.000093 | 0.002062 | -0.002566 | 0.000571 | 0.002883 | 0.001143 | -0.014693 | -0.002194 | 0.019461 | -0.012736 | -0.010971 | 0.008059 | 0.011230 |
| foundation_type_h | -0.000571 | 0.012918 | -0.036710 | 0.008930 | -0.040922 | -0.004960 | -0.008922 | -0.027473 | 0.017915 | -0.107303 | 0.138152 | -0.002471 | -0.010588 | 0.005285 | 0.047586 | 0.010347 | -0.008352 | -0.007836 | 0.109329 | -0.001081 | 0.003111 | 0.002899 | 0.002666 | -0.002724 | 0.002760 | -0.001420 | 0.000710 | -0.001025 | -0.000903 | -0.000702 | -0.002468 | -0.016202 | -0.004123 | -0.003292 | 0.005329 | 1.000000 | -0.015376 | -0.171987 | -0.017984 | -0.018550 | -0.000445 | 0.007153 | -0.011747 | -0.013238 | 0.028296 | -0.000821 | 0.001366 | 0.062015 | 0.044556 | -0.065098 | -0.008078 | 0.045647 | 0.015539 | -0.004912 | -0.011492 | 0.004963 | 0.012622 | -0.002641 | -0.026348 | -0.000687 | 0.006778 | -0.000903 | -0.001847 | 0.016378 | 0.004363 | 0.019649 | 0.034576 | 0.004004 | -0.053635 | 0.048189 |
| foundation_type_i | -0.000334 | 0.003362 | -0.044893 | -0.010494 | 0.040634 | -0.048860 | 0.301688 | 0.163201 | -0.061759 | -0.359323 | -0.034514 | 0.064682 | -0.043979 | 0.246290 | -0.109812 | -0.059702 | 0.499215 | 0.540876 | -0.019929 | -0.026917 | 0.167006 | -0.043896 | 0.241753 | 0.176422 | 0.065383 | 0.035004 | 0.012292 | 0.017033 | 0.026502 | 0.008417 | 0.009222 | -0.263901 | -0.028120 | -0.005485 | 0.028371 | -0.015376 | 1.000000 | -0.473285 | -0.049491 | -0.051047 | -0.278241 | -0.108694 | 0.718842 | -0.406708 | 0.002371 | 0.586806 | -0.033341 | -0.008398 | 0.082859 | -0.264690 | 0.617345 | -0.085489 | 0.123052 | 0.009350 | -0.119468 | 0.059076 | 0.009240 | 0.045622 | -0.056641 | -0.001890 | 0.006049 | 0.002347 | 0.007516 | 0.034844 | 0.006383 | 0.031089 | 0.002195 | 0.006537 | -0.000286 | -0.007457 |
| foundation_type_r | 0.002724 | -0.188061 | 0.097980 | 0.001456 | 0.131692 | 0.072640 | -0.198506 | 0.008661 | 0.098101 | 0.538937 | 0.014212 | -0.142311 | 0.071139 | -0.405372 | -0.126082 | -0.136061 | -0.289362 | -0.283009 | -0.032881 | 0.034281 | -0.106781 | 0.038261 | -0.164075 | -0.117407 | -0.041811 | -0.027123 | -0.019153 | -0.011648 | -0.013007 | -0.004856 | -0.002802 | 0.343355 | 0.026638 | -0.002253 | -0.023376 | -0.171987 | -0.473285 | 1.000000 | -0.553581 | -0.570987 | 0.260969 | -0.006366 | -0.483566 | 0.385112 | -0.023390 | -0.514300 | 0.005984 | -0.050744 | -0.202062 | 0.325542 | -0.415477 | 0.008312 | -0.067188 | 0.004533 | 0.038127 | -0.004205 | -0.023290 | -0.026876 | 0.087701 | 0.001709 | -0.010028 | -0.003445 | -0.021993 | -0.032721 | -0.020475 | -0.079708 | -0.184511 | -0.036539 | 0.148767 | 0.011694 |
| foundation_type_u | -0.001303 | 0.077189 | -0.062514 | -0.007069 | -0.095874 | -0.020131 | 0.126860 | -0.036728 | -0.051293 | -0.260836 | -0.015621 | 0.189400 | -0.016662 | 0.432362 | -0.048161 | -0.039399 | 0.054450 | 0.019696 | 0.006294 | -0.014280 | 0.035894 | -0.030320 | 0.070899 | 0.047713 | 0.014641 | 0.014089 | 0.017404 | 0.007776 | -0.000111 | 0.003128 | 0.007803 | -0.139452 | -0.020410 | -0.014404 | 0.025489 | -0.017984 | -0.049491 | -0.553581 | 1.000000 | -0.059707 | -0.083819 | -0.036398 | 0.222982 | -0.306078 | 0.026089 | 0.338507 | 0.073193 | -0.003251 | 0.149500 | -0.168047 | 0.190886 | -0.034650 | 0.037070 | 0.006151 | -0.038385 | 0.019644 | 0.006086 | 0.010621 | -0.053714 | -0.000374 | 0.006964 | 0.002684 | 0.011137 | 0.025341 | 0.005591 | 0.048582 | 0.035575 | 0.005716 | -0.024762 | -0.008615 |
| foundation_type_w | -0.002529 | 0.212048 | -0.042831 | 0.010621 | -0.133956 | -0.051184 | -0.064849 | -0.106852 | -0.057067 | -0.251562 | -0.021820 | -0.015541 | -0.054542 | 0.003677 | 0.321598 | 0.298215 | -0.019256 | -0.030715 | 0.027355 | -0.016648 | -0.009915 | 0.005801 | -0.017334 | -0.010889 | -0.004935 | -0.000392 | 0.002417 | -0.003403 | -0.001637 | -0.002331 | -0.010210 | -0.173328 | 0.003250 | 0.023215 | -0.013889 | -0.018550 | -0.051047 | -0.570987 | -0.059707 | 1.000000 | -0.091521 | 0.134854 | -0.063856 | 0.043101 | 0.000197 | -0.020197 | -0.052855 | 0.069888 | 0.086419 | -0.101438 | -0.054589 | 0.078369 | -0.039820 | -0.019404 | 0.082233 | -0.063987 | 0.018686 | -0.005979 | -0.028694 | -0.000494 | 0.001645 | 0.001082 | 0.017800 | -0.008109 | 0.019803 | 0.044888 | 0.241083 | 0.044787 | -0.191257 | -0.018933 |
| roof_type_n | -0.001501 | -0.027454 | 0.082802 | 0.002052 | -0.099278 | 0.022504 | -0.148760 | -0.126140 | 0.067185 | 0.184909 | 0.020894 | -0.015550 | 0.023248 | -0.154182 | 0.000588 | -0.023744 | -0.181339 | -0.182108 | 0.017685 | -0.013092 | -0.029727 | 0.045072 | -0.073381 | -0.069152 | -0.026242 | -0.010579 | -0.005578 | -0.004514 | -0.010183 | -0.006372 | 0.001533 | 0.078612 | 0.043577 | 0.015381 | -0.047200 | -0.000445 | -0.278241 | 0.260969 | -0.083819 | -0.091521 | 1.000000 | -0.852932 | -0.394571 | 0.189658 | 0.000871 | -0.253680 | -0.005797 | 0.009555 | 0.045850 | 0.051269 | -0.273719 | 0.043565 | -0.044191 | 0.006333 | 0.043078 | -0.023873 | -0.000758 | -0.012838 | 0.037325 | 0.001428 | -0.003961 | -0.003932 | -0.011397 | -0.004166 | -0.012382 | -0.046337 | -0.019719 | -0.011800 | 0.006037 | 0.025599 |
| roof_type_q | 0.000890 | 0.002146 | -0.051035 | 0.006104 | 0.085249 | -0.001465 | -0.043030 | 0.039480 | -0.042600 | 0.049611 | 0.001883 | -0.029896 | -0.034398 | -0.072475 | 0.073469 | 0.067672 | -0.058043 | -0.069092 | -0.009717 | 0.028033 | -0.056887 | -0.017783 | -0.052694 | -0.032026 | -0.008810 | -0.006760 | -0.003017 | -0.004993 | -0.001480 | 0.002467 | -0.006735 | 0.074460 | -0.023711 | -0.007089 | 0.025081 | 0.007153 | -0.108694 | -0.006366 | -0.036398 | 0.134854 | -0.852932 | 1.000000 | -0.143125 | 0.072146 | -0.011066 | -0.103696 | 0.008145 | -0.003387 | -0.113587 | 0.122346 | -0.107760 | 0.012237 | -0.037965 | -0.022369 | 0.036829 | -0.013244 | -0.005099 | -0.010185 | 0.002562 | -0.000195 | -0.003991 | 0.003756 | 0.006376 | -0.020575 | 0.009240 | 0.021587 | 0.028351 | 0.005416 | -0.011570 | -0.022803 |
| roof_type_x | 0.001280 | 0.048272 | -0.067152 | -0.014635 | 0.038167 | -0.040087 | 0.357777 | 0.169657 | -0.052391 | -0.437898 | -0.042929 | 0.082105 | 0.016471 | 0.419886 | -0.130435 | -0.074099 | 0.445971 | 0.466875 | -0.016424 | -0.024521 | 0.156494 | -0.054151 | 0.231876 | 0.187479 | 0.065260 | 0.031956 | 0.015887 | 0.017345 | 0.021912 | 0.007738 | 0.008948 | -0.280106 | -0.040881 | -0.016683 | 0.045338 | -0.011747 | 0.718842 | -0.483566 | 0.222982 | -0.063856 | -0.394571 | -0.143125 | 1.000000 | -0.486566 | 0.017827 | 0.663480 | -0.003346 | -0.012152 | 0.113009 | -0.312555 | 0.708627 | -0.104134 | 0.150609 | 0.027367 | -0.146498 | 0.068571 | 0.010411 | 0.042268 | -0.075274 | -0.002364 | 0.014534 | 0.000843 | 0.010384 | 0.044115 | 0.007211 | 0.049853 | -0.012519 | 0.012838 | 0.008920 | -0.008395 |
| ground_floor_type_f | 0.000669 | -0.075405 | 0.099327 | 0.008916 | 0.057356 | 0.043668 | -0.258259 | -0.052502 | 0.061781 | 0.382414 | -0.005933 | -0.139993 | -0.029532 | -0.463885 | 0.065689 | 0.078491 | -0.258811 | -0.251369 | -0.002075 | 0.019229 | -0.113672 | 0.057117 | -0.191555 | -0.127559 | -0.042949 | -0.028839 | -0.018759 | -0.012991 | -0.015675 | -0.002575 | -0.035653 | 0.234755 | 0.028090 | 0.006212 | -0.028686 | -0.013238 | -0.406708 | 0.385112 | -0.306078 | 0.043101 | 0.189658 | 0.072146 | -0.486566 | 1.000000 | -0.089614 | -0.654560 | -0.658725 | -0.126103 | -0.166582 | 0.285625 | -0.415431 | 0.025611 | -0.103564 | -0.016903 | 0.092993 | -0.038924 | -0.014846 | -0.029443 | 0.083084 | 0.002426 | -0.013836 | -0.002056 | -0.016803 | -0.037621 | -0.008318 | -0.071965 | -0.035542 | -0.001527 | 0.022209 | 0.010237 |
| ground_floor_type_m | 0.004536 | 0.009872 | 0.003788 | -0.002068 | -0.004770 | -0.003589 | 0.009583 | 0.000424 | 0.005500 | -0.032287 | 0.006001 | 0.029088 | 0.017736 | 0.033557 | -0.004099 | -0.005985 | 0.003173 | -0.000736 | 0.026773 | -0.003711 | 0.013845 | -0.002022 | 0.020230 | 0.006682 | 0.001483 | -0.000840 | 0.006534 | -0.000606 | -0.000534 | -0.000415 | 0.009021 | -0.019345 | 0.005259 | -0.002579 | -0.003612 | 0.028296 | 0.002371 | -0.023390 | 0.026089 | 0.000197 | 0.000871 | -0.011066 | 0.017827 | -0.089614 | 1.000000 | -0.014266 | -0.014357 | -0.002748 | 0.014343 | -0.022432 | 0.011010 | 0.008941 | 0.010723 | 0.004112 | -0.008536 | 0.002201 | 0.021017 | -0.001562 | -0.009488 | -0.000406 | -0.000587 | -0.000534 | -0.001092 | 0.005895 | 0.000778 | 0.003618 | 0.002573 | 0.001310 | -0.002839 | 0.000674 |
| ground_floor_type_v | -0.000537 | 0.110563 | -0.078279 | -0.004325 | -0.070247 | -0.052342 | 0.328150 | 0.062001 | -0.078740 | -0.487756 | -0.044935 | 0.134846 | -0.005787 | 0.573645 | -0.096547 | -0.070580 | 0.362685 | 0.362155 | -0.006970 | -0.028777 | 0.150721 | -0.061304 | 0.243568 | 0.159435 | 0.059481 | 0.036725 | 0.024352 | 0.018549 | 0.020017 | 0.006748 | 0.012344 | -0.318720 | -0.050064 | -0.023284 | 0.056864 | -0.000821 | 0.586806 | -0.514300 | 0.338507 | -0.020197 | -0.253680 | -0.103696 | 0.663480 | -0.654560 | -0.014266 | 1.000000 | -0.104867 | -0.020075 | 0.202827 | -0.349559 | 0.563830 | -0.061529 | 0.114804 | 0.005830 | -0.106931 | 0.050755 | 0.008116 | 0.043265 | -0.096085 | -0.002966 | 0.013496 | 0.000450 | 0.019134 | 0.035457 | 0.011660 | 0.092792 | 0.051617 | 0.005251 | -0.036485 | -0.009194 |
| ground_floor_type_x | -0.001068 | -0.009462 | -0.055428 | -0.006870 | 0.001164 | -0.006343 | 0.020847 | 0.014229 | -0.002399 | -0.012591 | 0.026530 | 0.049031 | 0.045420 | 0.052841 | 0.001009 | -0.034266 | -0.011091 | -0.019812 | 0.004861 | 0.007064 | -0.004277 | -0.019024 | 0.007931 | 0.011871 | -0.001870 | 0.001394 | 0.000546 | -0.000645 | 0.001484 | -0.003052 | 0.034888 | 0.006606 | 0.005385 | 0.014648 | -0.011822 | 0.001366 | -0.033341 | 0.005984 | 0.073193 | -0.052855 | -0.005797 | 0.008145 | -0.003346 | -0.658725 | -0.014357 | -0.104867 | 1.000000 | -0.020203 | 0.012177 | -0.023557 | 0.000361 | 0.018479 | 0.022569 | 0.016400 | -0.017855 | 0.002541 | 0.008379 | -0.002969 | -0.012727 | -0.000142 | 0.005513 | 0.001484 | 0.003079 | 0.012565 | -0.001086 | 0.003297 | -0.007276 | -0.004113 | 0.010114 | -0.005510 |
| ground_floor_type_z | 0.000089 | -0.001048 | -0.006407 | -0.002622 | -0.037901 | 0.000024 | -0.000538 | -0.024187 | -0.016562 | -0.064281 | 0.119927 | 0.006802 | -0.011657 | -0.011169 | 0.033112 | -0.002743 | -0.003914 | -0.004920 | 0.004056 | -0.018193 | 0.027053 | 0.015235 | 0.025166 | 0.003363 | 0.002135 | 0.005343 | -0.002036 | -0.000853 | -0.000751 | -0.000584 | -0.001857 | -0.016728 | 0.027100 | 0.002453 | -0.026011 | 0.062015 | -0.008398 | -0.050744 | -0.003251 | 0.069888 | 0.009555 | -0.003387 | -0.012152 | -0.126103 | -0.002748 | -0.020075 | -0.020203 | 1.000000 | 0.041566 | -0.051670 | -0.010137 | 0.032341 | 0.006711 | 0.000008 | -0.000086 | -0.003886 | 0.002051 | -0.000442 | -0.011462 | -0.000571 | -0.000826 | 0.004379 | 0.003480 | 0.009777 | 0.002836 | 0.004714 | 0.016687 | 0.003574 | -0.015990 | 0.003493 |
| other_floor_type_j | 0.000029 | 0.112915 | -0.040881 | 0.009160 | -0.658161 | -0.051634 | -0.037512 | -0.531358 | -0.067859 | -0.206889 | 0.008790 | 0.023387 | -0.036999 | 0.224796 | -0.010066 | 0.028976 | 0.062207 | 0.037306 | 0.025537 | -0.055671 | -0.015683 | -0.033454 | 0.009360 | 0.010498 | -0.002247 | 0.003161 | 0.015110 | -0.001160 | -0.003364 | -0.000586 | -0.003280 | -0.135668 | -0.001642 | 0.001491 | 0.000806 | 0.044556 | 0.082859 | -0.202062 | 0.149500 | 0.086419 | 0.045850 | -0.113587 | 0.113009 | -0.166582 | 0.014343 | 0.202827 | 0.012177 | 0.041566 | 1.000000 | -0.559424 | -0.093452 | -0.190029 | -0.043182 | -0.027925 | 0.063058 | -0.038252 | 0.016286 | -0.001417 | -0.059093 | -0.001582 | -0.000026 | -0.000715 | 0.005048 | 0.049952 | 0.002958 | 0.031227 | 0.099468 | 0.025998 | -0.082096 | -0.007432 |
| other_floor_type_q | -0.001353 | -0.130033 | 0.070900 | 0.000706 | 0.353823 | 0.054615 | -0.129557 | 0.202727 | 0.043799 | 0.325957 | -0.086745 | -0.070494 | 0.021699 | -0.290010 | -0.042880 | -0.009336 | -0.197049 | -0.165441 | -0.028571 | 0.036066 | -0.052286 | 0.056337 | -0.117951 | -0.077937 | -0.027916 | -0.019559 | -0.014362 | -0.006437 | -0.008644 | -0.003042 | -0.015414 | 0.166759 | -0.018646 | -0.008444 | 0.021071 | -0.065098 | -0.264690 | 0.325542 | -0.168047 | -0.101438 | 0.051269 | 0.122346 | -0.312555 | 0.285625 | -0.022432 | -0.349559 | -0.023557 | -0.051670 | -0.559424 | 1.000000 | -0.289663 | -0.589013 | -0.043467 | 0.003581 | 0.013321 | 0.009881 | -0.012772 | -0.017411 | 0.090022 | -0.000827 | -0.007301 | -0.000726 | -0.013823 | -0.062468 | -0.014100 | -0.056300 | -0.087368 | -0.017559 | 0.070879 | 0.004911 |
| other_floor_type_s | 0.001674 | 0.020317 | -0.050574 | -0.012337 | 0.172380 | -0.031045 | 0.317857 | 0.305874 | -0.050588 | -0.366995 | -0.035450 | 0.084153 | 0.010323 | 0.356077 | -0.110732 | -0.063312 | 0.373658 | 0.412847 | -0.013583 | -0.009483 | 0.181789 | -0.044810 | 0.257442 | 0.193737 | 0.074409 | 0.043014 | 0.015727 | 0.019659 | 0.026121 | 0.005721 | 0.010105 | -0.226820 | -0.029515 | -0.006433 | 0.030097 | -0.008078 | 0.617345 | -0.415477 | 0.190886 | -0.054589 | -0.273719 | -0.107760 | 0.708627 | -0.415431 | 0.011010 | 0.563830 | 0.000361 | -0.010137 | -0.093452 | -0.289663 | 1.000000 | -0.098395 | 0.160821 | 0.022002 | -0.153596 | 0.071864 | 0.014927 | 0.052338 | -0.057868 | -0.002021 | 0.013597 | 0.000373 | 0.009377 | 0.025185 | 0.007047 | 0.040403 | -0.011618 | 0.015616 | 0.003963 | -0.002459 |
| other_floor_type_x | 0.000778 | 0.047570 | -0.023676 | -0.002812 | 0.081222 | -0.003244 | 0.024702 | 0.078891 | 0.037400 | -0.014855 | 0.123562 | 0.021139 | 0.001873 | -0.042740 | 0.127464 | 0.019728 | -0.015777 | -0.054640 | 0.019910 | 0.012485 | -0.019627 | -0.015273 | -0.001541 | -0.018486 | -0.003644 | -0.001991 | -0.004884 | -0.001629 | -0.000286 | 0.001277 | 0.017397 | 0.043188 | 0.042295 | 0.013093 | -0.044948 | 0.045647 | -0.085489 | 0.008312 | -0.034650 | 0.078369 | 0.043565 | 0.012237 | -0.104134 | 0.025611 | 0.008941 | -0.061529 | 0.018479 | 0.032341 | -0.190029 | -0.589013 | -0.098395 | 1.000000 | 0.007330 | 0.009950 | 0.008366 | -0.016289 | -0.007623 | -0.005596 | -0.026696 | 0.003734 | 0.001806 | 0.001419 | 0.007709 | 0.018314 | 0.011398 | 0.019857 | 0.023399 | -0.011203 | -0.014555 | 0.002214 |
| position_j | 0.001395 | -0.004851 | -0.006580 | -0.015732 | 0.246396 | 0.056348 | 0.070492 | 0.229007 | 0.134657 | -0.225523 | 0.002012 | 0.023447 | 0.258090 | 0.097038 | -0.027171 | -0.033477 | 0.078829 | 0.081470 | -0.002014 | 0.013228 | 0.100408 | -0.025527 | 0.181614 | 0.046600 | 0.013956 | 0.006624 | 0.006819 | 0.009547 | 0.002981 | 0.001537 | 0.002447 | -0.032275 | -0.030546 | -0.016959 | 0.035988 | 0.015539 | 0.123052 | -0.067188 | 0.037070 | -0.039820 | -0.044191 | -0.037965 | 0.150609 | -0.103564 | 0.010723 | 0.114804 | 0.022569 | 0.006711 | -0.043182 | -0.043467 | 0.160821 | 0.007330 | 1.000000 | -0.022025 | -0.430682 | -0.102867 | 0.001210 | 0.009609 | -0.018730 | 0.001669 | 0.011368 | -0.002799 | 0.002753 | 0.008344 | 0.000175 | 0.016040 | 0.002070 | 0.036522 | -0.025076 | 0.016882 |
| position_o | -0.002529 | -0.002604 | 0.004049 | -0.019633 | 0.219764 | 0.069492 | -0.010139 | 0.157560 | 0.111681 | -0.143059 | -0.008076 | 0.005011 | 0.217264 | 0.053953 | 0.003749 | -0.011296 | 0.007997 | 0.007500 | -0.000322 | 0.017473 | 0.004521 | -0.020781 | 0.027696 | 0.015044 | -0.002916 | -0.001805 | 0.009346 | 0.001668 | -0.001148 | -0.000893 | 0.000033 | -0.004787 | -0.022565 | -0.012620 | 0.026628 | -0.004912 | 0.009350 | 0.004533 | 0.006151 | -0.019404 | 0.006333 | -0.022369 | 0.027367 | -0.016903 | 0.004112 | 0.005830 | 0.016400 | 0.000008 | -0.027925 | 0.003581 | 0.022002 | 0.009950 | -0.022025 | 1.000000 | -0.176635 | -0.042189 | -0.001646 | 0.009339 | -0.012567 | -0.000873 | 0.023270 | 0.005600 | 0.004251 | 0.001127 | 0.001010 | 0.012944 | -0.005194 | 0.013484 | 0.002048 | -0.006451 |
| position_s | 0.002843 | 0.024287 | -0.005419 | 0.017344 | -0.229465 | -0.049696 | -0.047300 | -0.216107 | -0.149165 | 0.199138 | -0.016222 | -0.025366 | -0.223701 | -0.087631 | 0.052212 | 0.054739 | -0.071683 | -0.079576 | -0.000245 | -0.005257 | -0.099449 | 0.007712 | -0.152560 | -0.053352 | -0.011100 | -0.005761 | -0.008537 | -0.008048 | -0.007973 | 0.000161 | -0.008439 | -0.027769 | 0.024974 | 0.017535 | -0.031147 | -0.011492 | -0.119468 | 0.038127 | -0.038385 | 0.082233 | 0.043078 | 0.036829 | -0.146498 | 0.092993 | -0.008536 | -0.106931 | -0.017855 | -0.000086 | 0.063058 | 0.013321 | -0.153596 | 0.008366 | -0.430682 | -0.176635 | 1.000000 | -0.824950 | 0.003426 | -0.010952 | 0.026668 | -0.001061 | -0.012233 | -0.001118 | -0.002719 | -0.027621 | -0.000332 | -0.005691 | 0.009943 | -0.019906 | 0.015607 | -0.028634 |
| position_t | -0.003383 | -0.023791 | 0.008971 | -0.005199 | 0.056249 | 0.004850 | 0.013992 | 0.067331 | 0.059616 | -0.053987 | 0.019112 | 0.013364 | 0.043463 | 0.027350 | -0.043589 | -0.038871 | 0.031878 | 0.039318 | 0.001552 | -0.006368 | 0.051205 | 0.011740 | 0.056921 | 0.028575 | 0.004954 | 0.003012 | 0.003188 | 0.002969 | 0.007495 | -0.000866 | 0.008036 | 0.051605 | -0.004254 | -0.006467 | 0.006941 | 0.004963 | 0.059076 | -0.004205 | 0.019644 | -0.063987 | -0.023873 | -0.013244 | 0.068571 | -0.038924 | 0.002201 | 0.050755 | 0.002541 | -0.003886 | -0.038252 | 0.009881 | 0.071864 | -0.016289 | -0.102867 | -0.042189 | -0.824950 | 1.000000 | -0.004155 | 0.004253 | -0.015708 | 0.000427 | 0.001112 | 0.001496 | 0.000347 | 0.025846 | 0.000013 | -0.006397 | -0.011097 | -0.002686 | -0.003209 | 0.023846 |
| plan_configuration_a | 0.001827 | -0.001566 | -0.000613 | -0.004617 | -0.009958 | 0.008640 | 0.007714 | -0.004534 | -0.004491 | -0.015076 | -0.002476 | -0.000549 | -0.003025 | 0.010310 | 0.010691 | 0.007781 | 0.005666 | 0.004944 | 0.013438 | -0.004118 | 0.006191 | -0.003131 | 0.009949 | 0.010962 | -0.000954 | -0.000591 | -0.001019 | -0.000427 | -0.000376 | -0.000292 | 0.001228 | -0.019583 | 0.014624 | 0.006293 | -0.016371 | 0.012622 | 0.009240 | -0.023290 | 0.006086 | 0.018686 | -0.000758 | -0.005099 | 0.010411 | -0.014846 | 0.021017 | 0.008116 | 0.008379 | 0.002051 | 0.016286 | -0.012772 | 0.014927 | -0.007623 | 0.001210 | -0.001646 | 0.003426 | -0.004155 | 1.000000 | -0.001099 | -0.151622 | -0.000286 | -0.000413 | -0.000376 | -0.000769 | -0.004649 | -0.001134 | -0.003708 | -0.003715 | 0.000948 | 0.002838 | -0.000721 |
| plan_configuration_c | 0.001227 | 0.004566 | -0.006873 | 0.003044 | 0.017159 | -0.000567 | 0.047805 | 0.028838 | -0.004518 | -0.028242 | -0.004871 | 0.005748 | 0.007701 | 0.024537 | -0.001714 | -0.008042 | 0.029699 | 0.029456 | 0.002801 | 0.001356 | 0.023331 | -0.003510 | 0.034423 | 0.012575 | 0.009559 | 0.022232 | 0.005493 | -0.000485 | -0.000427 | -0.000332 | -0.001011 | -0.023181 | -0.001048 | 0.002245 | -0.000093 | -0.002641 | 0.045622 | -0.026876 | 0.010621 | -0.005979 | -0.012838 | -0.010185 | 0.042268 | -0.029443 | -0.001562 | 0.043265 | -0.002969 | -0.000442 | -0.001417 | -0.017411 | 0.052338 | -0.005596 | 0.009609 | 0.009339 | -0.010952 | 0.004253 | -0.001099 | 1.000000 | -0.172212 | -0.000325 | -0.000470 | -0.000427 | -0.000873 | -0.005280 | -0.001288 | -0.004211 | 0.000851 | 0.001687 | -0.000547 | -0.001443 |
| plan_configuration_d | -0.001833 | -0.030270 | 0.022083 | -0.001975 | 0.025091 | 0.001872 | -0.089410 | 0.001975 | -0.023631 | 0.110799 | -0.003052 | -0.024325 | -0.031969 | -0.089713 | -0.001889 | -0.007401 | -0.041168 | -0.034788 | -0.016398 | -0.004938 | -0.015952 | 0.017456 | -0.034914 | -0.022548 | -0.010239 | -0.012523 | -0.005201 | -0.004293 | 0.000864 | -0.006370 | 0.001884 | 0.042804 | -0.001225 | -0.001997 | 0.002062 | -0.026348 | -0.056641 | 0.087701 | -0.053714 | -0.028694 | 0.037325 | 0.002562 | -0.075274 | 0.083084 | -0.009488 | -0.096085 | -0.012727 | -0.011462 | -0.059093 | 0.090022 | -0.057868 | -0.026696 | -0.018730 | -0.012567 | 0.026668 | -0.015708 | -0.151622 | -0.172212 | 1.000000 | -0.044780 | -0.064754 | -0.058854 | -0.120415 | -0.728247 | -0.177696 | -0.580764 | -0.020763 | -0.002206 | 0.016361 | 0.000610 |
| plan_configuration_f | 0.002184 | 0.001674 | 0.000590 | -0.001072 | 0.003528 | -0.001128 | 0.002054 | 0.003580 | 0.001543 | 0.000233 | -0.001733 | 0.001869 | 0.002487 | -0.002621 | 0.001332 | 0.001692 | 0.002199 | -0.001166 | 0.002304 | 0.001351 | -0.000611 | -0.000708 | 0.000603 | -0.000830 | -0.000282 | -0.000175 | -0.000301 | -0.000126 | -0.000111 | -0.000086 | -0.000659 | 0.000517 | 0.002435 | 0.000708 | -0.002566 | -0.000687 | -0.001890 | 0.001709 | -0.000374 | -0.000494 | 0.001428 | -0.000195 | -0.002364 | 0.002426 | -0.000406 | -0.002966 | -0.000142 | -0.000571 | -0.001582 | -0.000827 | -0.002021 | 0.003734 | 0.001669 | -0.000873 | -0.001061 | 0.000427 | -0.000286 | -0.000325 | -0.044780 | 1.000000 | -0.000122 | -0.000111 | -0.000227 | -0.001373 | -0.000335 | -0.001095 | -0.001351 | -0.000693 | -0.000407 | 0.003206 |
| plan_configuration_m | -0.002395 | 0.004048 | -0.007971 | -0.005090 | 0.015891 | 0.010677 | 0.022963 | 0.014457 | 0.005002 | -0.013597 | 0.000667 | 0.000348 | 0.012453 | 0.010442 | -0.001809 | -0.001979 | 0.002919 | 0.005249 | 0.000739 | -0.000181 | 0.008113 | -0.003485 | 0.016750 | 0.002022 | -0.000408 | 0.014959 | -0.000435 | -0.000182 | -0.000160 | -0.000125 | -0.000953 | -0.007063 | -0.000228 | -0.000769 | 0.000571 | 0.006778 | 0.006049 | -0.010028 | 0.006964 | 0.001645 | -0.003961 | -0.003991 | 0.014534 | -0.013836 | -0.000587 | 0.013496 | 0.005513 | -0.000826 | -0.000026 | -0.007301 | 0.013597 | 0.001806 | 0.011368 | 0.023270 | -0.012233 | 0.001112 | -0.000413 | -0.000470 | -0.064754 | -0.000122 | 1.000000 | -0.000160 | -0.000328 | -0.001985 | -0.000484 | -0.001583 | -0.001953 | -0.001002 | 0.002607 | -0.001354 |
| plan_configuration_n | 0.001045 | 0.000822 | 0.000415 | 0.003123 | 0.002651 | -0.001462 | 0.003568 | 0.002732 | -0.001530 | 0.000035 | -0.000532 | -0.001646 | -0.000744 | 0.000168 | 0.000956 | -0.001402 | -0.002547 | 0.003555 | 0.001126 | 0.000463 | -0.000253 | -0.000578 | 0.001273 | -0.001091 | -0.000370 | -0.000229 | -0.000395 | -0.000166 | -0.000146 | -0.000113 | -0.000866 | -0.001587 | -0.002946 | -0.000384 | 0.002883 | -0.000903 | 0.002347 | -0.003445 | 0.002684 | 0.001082 | -0.003932 | 0.003756 | 0.000843 | -0.002056 | -0.000534 | 0.000450 | 0.001484 | 0.004379 | -0.000715 | -0.000726 | 0.000373 | 0.001419 | -0.002799 | 0.005600 | -0.001118 | 0.001496 | -0.000376 | -0.000427 | -0.058854 | -0.000111 | -0.000160 | 1.000000 | -0.000298 | -0.001805 | -0.000440 | -0.001439 | 0.002642 | 0.003329 | -0.002676 | -0.001230 |
| plan_configuration_o | -0.000097 | 0.008179 | -0.005786 | -0.002804 | 0.001148 | -0.000917 | 0.035633 | 0.006637 | -0.002785 | -0.020852 | 0.005583 | 0.005924 | 0.004417 | 0.015334 | 0.005511 | 0.004727 | 0.010949 | 0.003083 | 0.000790 | 0.006891 | 0.006020 | -0.002049 | 0.010908 | 0.001234 | -0.000758 | 0.015898 | -0.000809 | -0.000339 | -0.000298 | -0.000232 | 0.000405 | -0.013941 | 0.000599 | -0.003602 | 0.001143 | -0.001847 | 0.007516 | -0.021993 | 0.011137 | 0.017800 | -0.011397 | 0.006376 | 0.010384 | -0.016803 | -0.001092 | 0.019134 | 0.003079 | 0.003480 | 0.005048 | -0.013823 | 0.009377 | 0.007709 | 0.002753 | 0.004251 | -0.002719 | 0.000347 | -0.000769 | -0.000873 | -0.120415 | -0.000227 | -0.000328 | -0.000298 | 1.000000 | -0.003692 | -0.000901 | -0.002944 | 0.007168 | 0.000210 | -0.005021 | -0.000976 |
| plan_configuration_q | -0.000037 | 0.009383 | -0.004326 | 0.001895 | -0.033170 | -0.001676 | 0.016648 | -0.016496 | 0.017312 | -0.050911 | 0.011763 | 0.012402 | 0.006362 | 0.029711 | -0.015985 | -0.005543 | 0.019966 | 0.025159 | 0.008796 | -0.006440 | -0.000735 | -0.014274 | 0.012321 | 0.006997 | 0.005695 | 0.001309 | 0.002333 | 0.005610 | 0.000370 | 0.006980 | -0.003361 | 0.008099 | 0.012777 | 0.006328 | -0.014693 | 0.016378 | 0.034844 | -0.032721 | 0.025341 | -0.008109 | -0.004166 | -0.020575 | 0.044115 | -0.037621 | 0.005895 | 0.035457 | 0.012565 | 0.009777 | 0.049952 | -0.062468 | 0.025185 | 0.018314 | 0.008344 | 0.001127 | -0.027621 | 0.025846 | -0.004649 | -0.005280 | -0.728247 | -0.001373 | -0.001985 | -0.001805 | -0.003692 | 1.000000 | -0.005449 | -0.017807 | 0.011788 | -0.003562 | -0.007498 | -0.000123 |
| plan_configuration_s | 0.002985 | 0.008559 | -0.005928 | -0.003399 | -0.003314 | -0.001011 | 0.019164 | -0.000126 | 0.005313 | -0.017476 | 0.003543 | 0.006057 | 0.001848 | 0.015960 | 0.011793 | 0.013446 | 0.006400 | 0.003806 | 0.003309 | -0.000112 | 0.003774 | -0.002694 | 0.007813 | 0.002583 | 0.002320 | -0.000693 | 0.002029 | -0.000500 | -0.000440 | 0.010875 | -0.001139 | -0.011272 | -0.002509 | 0.009568 | -0.002194 | 0.004363 | 0.006383 | -0.020475 | 0.005591 | 0.019803 | -0.012382 | 0.009240 | 0.007211 | -0.008318 | 0.000778 | 0.011660 | -0.001086 | 0.002836 | 0.002958 | -0.014100 | 0.007047 | 0.011398 | 0.000175 | 0.001010 | -0.000332 | 0.000013 | -0.001134 | -0.001288 | -0.177696 | -0.000335 | -0.000484 | -0.000440 | -0.000901 | -0.005449 | 1.000000 | -0.004345 | 0.002696 | 0.005685 | -0.004000 | -0.000579 |
| plan_configuration_u | 0.001353 | 0.033052 | -0.025532 | 0.003240 | -0.004863 | -0.003630 | 0.096170 | 0.006197 | 0.019020 | -0.098556 | -0.009556 | 0.020615 | 0.041119 | 0.094121 | 0.015926 | 0.014730 | 0.029206 | 0.014150 | 0.010570 | 0.015422 | 0.015722 | -0.007840 | 0.023424 | 0.021292 | 0.007000 | 0.008055 | 0.004088 | 0.000748 | -0.001439 | -0.001120 | 0.001520 | -0.062317 | -0.016605 | -0.009001 | 0.019461 | 0.019649 | 0.031089 | -0.079708 | 0.048582 | 0.044888 | -0.046337 | 0.021587 | 0.049853 | -0.071965 | 0.003618 | 0.092792 | 0.003297 | 0.004714 | 0.031227 | -0.056300 | 0.040403 | 0.019857 | 0.016040 | 0.012944 | -0.005691 | -0.006397 | -0.003708 | -0.004211 | -0.580764 | -0.001095 | -0.001583 | -0.001439 | -0.002944 | -0.017807 | -0.004345 | 1.000000 | 0.018570 | 0.005391 | -0.016370 | 0.000167 |
| legal_ownership_status_a | -0.000714 | 0.100393 | -0.025893 | 0.034199 | -0.096866 | -0.023928 | -0.028249 | -0.078852 | -0.030176 | -0.138746 | -0.008384 | 0.009067 | -0.019224 | 0.076331 | 0.102886 | 0.080457 | -0.001817 | -0.009482 | 0.016991 | -0.009021 | 0.017540 | -0.006178 | 0.038265 | 0.003079 | -0.002769 | -0.002792 | 0.004158 | 0.001874 | 0.000433 | -0.001381 | -0.002696 | -0.082705 | 0.001053 | 0.025052 | -0.012736 | 0.034576 | 0.002195 | -0.184511 | 0.035575 | 0.241083 | -0.019719 | 0.028351 | -0.012519 | -0.035542 | 0.002573 | 0.051617 | -0.007276 | 0.016687 | 0.099468 | -0.087368 | -0.011618 | 0.023399 | 0.002070 | -0.005194 | 0.009943 | -0.011097 | -0.003715 | 0.000851 | -0.020763 | -0.001351 | -0.001953 | 0.002642 | 0.007168 | 0.011788 | 0.002696 | 0.018570 | 1.000000 | -0.011083 | -0.749134 | -0.014976 |
| legal_ownership_status_r | -0.002099 | 0.007874 | -0.027011 | -0.001309 | -0.008236 | -0.001055 | -0.015343 | -0.008693 | 0.025108 | -0.047872 | -0.004939 | -0.006450 | 0.027949 | 0.004875 | 0.011204 | 0.032627 | 0.005137 | -0.000967 | 0.009238 | 0.001424 | 0.006853 | 0.004624 | 0.008651 | -0.005100 | -0.000643 | -0.001432 | 0.000662 | -0.001034 | -0.000910 | -0.000708 | -0.001822 | -0.002591 | 0.008232 | 0.007279 | -0.010971 | 0.004004 | 0.006537 | -0.036539 | 0.005716 | 0.044787 | -0.011800 | 0.005416 | 0.012838 | -0.001527 | 0.001310 | 0.005251 | -0.004113 | 0.003574 | 0.025998 | -0.017559 | 0.015616 | -0.011203 | 0.036522 | 0.013484 | -0.019906 | -0.002686 | 0.000948 | 0.001687 | -0.002206 | -0.000693 | -0.001002 | 0.003329 | 0.000210 | -0.003562 | 0.005685 | 0.005391 | -0.011083 | 1.000000 | -0.384233 | -0.007681 |
| legal_ownership_status_v | 0.002637 | -0.082055 | 0.023189 | -0.018533 | 0.073273 | 0.016697 | 0.029261 | 0.063050 | -0.004681 | 0.126641 | 0.005545 | -0.001186 | -0.000846 | -0.052481 | -0.072237 | -0.071334 | 0.002867 | 0.012230 | -0.014078 | 0.001646 | -0.024677 | -0.002232 | -0.040682 | -0.000846 | 0.002706 | -0.001620 | -0.001649 | -0.000271 | 0.000688 | 0.001844 | -0.005561 | 0.048238 | 0.001020 | -0.019143 | 0.008059 | -0.053635 | -0.000286 | 0.148767 | -0.024762 | -0.191257 | 0.006037 | -0.011570 | 0.008920 | 0.022209 | -0.002839 | -0.036485 | 0.010114 | -0.015990 | -0.082096 | 0.070879 | 0.003963 | -0.014555 | -0.025076 | 0.002048 | 0.015607 | -0.003209 | 0.002838 | -0.000547 | 0.016361 | -0.000407 | 0.002607 | -0.002676 | -0.005021 | -0.007498 | -0.004000 | -0.016370 | -0.749134 | -0.384233 | 1.000000 | -0.519193 |
| legal_ownership_status_w | -0.002361 | 0.004646 | 0.013579 | -0.013100 | 0.007046 | 0.003642 | -0.003113 | 0.000837 | 0.033165 | -0.003726 | 0.005245 | -0.005920 | 0.008238 | -0.014208 | -0.019786 | -0.005400 | -0.006599 | -0.008668 | -0.004734 | 0.008729 | 0.016117 | 0.009561 | 0.015197 | 0.000983 | -0.000642 | 0.008086 | -0.003335 | -0.001397 | -0.001230 | -0.000957 | 0.015623 | 0.029555 | -0.009533 | -0.005289 | 0.011230 | 0.048189 | -0.007457 | 0.011694 | -0.008615 | -0.018933 | 0.025599 | -0.022803 | -0.008395 | 0.010237 | 0.000674 | -0.009194 | -0.005510 | 0.003493 | -0.007432 | 0.004911 | -0.002459 | 0.002214 | 0.016882 | -0.006451 | -0.028634 | 0.023846 | -0.000721 | -0.001443 | 0.000610 | 0.003206 | -0.001354 | -0.001230 | -0.000976 | -0.000123 | -0.000579 | 0.000167 | -0.014976 | -0.007681 | -0.519193 | 1.000000 |
#obtain and display confusion matrix of correlations of damage values only
data_corr = finalDataset.corr("pearson")
table = data_corr['damage_grade'].sort_values(ascending=False).to_frame()
cm = sns.light_palette("blue", as_cmap=True)
tb = table.style.background_gradient(cmap=cm)
tb
| damage_grade | |
|---|---|
| damage_grade | 1.000000 |
| foundation_type_r | 0.343355 |
| has_superstructure_mud_mortar_stone | 0.291325 |
| ground_floor_type_f | 0.234755 |
| other_floor_type_q | 0.166759 |
| count_floors_pre_eq | 0.122308 |
| roof_type_n | 0.078612 |
| roof_type_q | 0.074460 |
| has_superstructure_stone_flag | 0.066039 |
| count_families | 0.056151 |
| has_superstructure_adobe_mud | 0.055314 |
| position_t | 0.051605 |
| legal_ownership_status_v | 0.048238 |
| height_percentage | 0.048130 |
| other_floor_type_x | 0.043188 |
| geo_level_2_id | 0.043161 |
| plan_configuration_d | 0.042804 |
| legal_ownership_status_w | 0.029555 |
| age | 0.029273 |
| land_surface_condition_o | 0.015078 |
| has_superstructure_mud_mortar_brick | 0.014561 |
| has_secondary_use_agriculture | 0.011309 |
| land_surface_condition_n | 0.008530 |
| plan_configuration_q | 0.008099 |
| geo_level_3_id | 0.007932 |
| ground_floor_type_x | 0.006606 |
| building_id | 0.001063 |
| plan_configuration_f | 0.000517 |
| plan_configuration_n | -0.001587 |
| has_secondary_use_use_police | -0.001656 |
| legal_ownership_status_r | -0.002591 |
| position_o | -0.004787 |
| plan_configuration_m | -0.007063 |
| has_secondary_use_health_post | -0.008543 |
| has_secondary_use_gov_office | -0.009378 |
| has_secondary_use_industry | -0.011024 |
| plan_configuration_s | -0.011272 |
| has_secondary_use_school | -0.011692 |
| plan_configuration_o | -0.013941 |
| land_surface_condition_t | -0.014909 |
| foundation_type_h | -0.016202 |
| has_secondary_use_other | -0.016334 |
| ground_floor_type_z | -0.016728 |
| ground_floor_type_m | -0.019345 |
| plan_configuration_a | -0.019583 |
| plan_configuration_c | -0.023181 |
| position_s | -0.027769 |
| has_secondary_use_institution | -0.028728 |
| has_superstructure_other | -0.030224 |
| position_j | -0.032275 |
| has_superstructure_cement_mortar_stone | -0.060295 |
| plan_configuration_u | -0.062317 |
| has_superstructure_bamboo | -0.063051 |
| has_superstructure_timber | -0.069852 |
| geo_level_1_id | -0.072347 |
| has_secondary_use | -0.079630 |
| legal_ownership_status_a | -0.082705 |
| has_secondary_use_rental | -0.083754 |
| has_secondary_use_hotel | -0.097942 |
| area_percentage | -0.125221 |
| other_floor_type_j | -0.135668 |
| foundation_type_u | -0.139452 |
| has_superstructure_rc_non_engineered | -0.158145 |
| foundation_type_w | -0.173328 |
| has_superstructure_rc_engineered | -0.179014 |
| other_floor_type_s | -0.226820 |
| has_superstructure_cement_mortar_brick | -0.254131 |
| foundation_type_i | -0.263901 |
| roof_type_x | -0.280106 |
| ground_floor_type_v | -0.318720 |
The following correlations should be noted:
1) The number of floors and the height percentage have a high positive correlation. This is an expected result as the building height increases with the number of floors
2) The highest positive correlation for damage grade with other features is for building with a ground_floor type "r", ground floor type "f", and superstructure of mortar stone. However, these correlation values are less that 0.5 and not enough to draw sufficient conclusions from
3) There is some negative correlation between the damage grade and foundation_type i, roof_type "x", and ground_floor type "v", but once again these are not low enough to draw useful conclusions.
3) Some correlation patterns can be seen within the encoded columns of the categorical data. However, this does not provide any useful information for the research questions
landSurfaceCols = [col for col in finalDataset if col.startswith('land_surface_condition')]
landSurfaceCols
['land_surface_condition_n', 'land_surface_condition_o', 'land_surface_condition_t']
import matplotlib
print(matplotlib.__version__)
3.5.1
for col in landSurfaceCols:
plt.figure(figsize=(15,5))
ax = sns.countplot(x=finalDataset[col],hue=finalDataset.damage_grade,palette="magma")
for container in ax.containers:
ax.bar_label(container)
plt.title(col+ " VS Damage",fontweight="bold")
plt.legend(["Low damage","Avg damage","High damage"])
plt.show()
for col in landSurfaceCols:
print(finalDataset[col].value_counts())
print(finalDataset[col].value_counts().sum())
0 225073 1 35528 Name: land_surface_condition_n, dtype: int64 260601 0 252285 1 8316 Name: land_surface_condition_o, dtype: int64 260601 1 216757 0 43844 Name: land_surface_condition_t, dtype: int64 260601
#plotting a pie chart to visualize count percentages
mergedDataset['land_surface_condition'].value_counts(normalize=True).plot(kind='pie', autopct="%.1f")
<AxesSubplot:ylabel='land_surface_condition'>
#placeholder DF to hold the percentage for each level of damage for land surface condition binary columns
landSurfaceDataset = pd.DataFrame(columns=['landsurfacecondition', 'Low Damage', 'Medium Damage', 'Complete Destruction'])
count = 0
for col in landSurfaceCols:
df1 = finalDataset[finalDataset[col] == 1]
num_1 = len(df1[df1['damage_grade'] == 1])
num_2 = len(df1[df1['damage_grade'] == 2])
num_3 = len(df1[df1['damage_grade'] == 3])
landSurfaceDataset.loc[count, 'landsurfacecondition'] = col
landSurfaceDataset.loc[count, 'Low Damage'] = num_1 / (num_1 + num_2 + num_3) * 100
landSurfaceDataset.loc[count, 'Medium Damage'] = num_2 / (num_1 + num_2 + num_3) * 100
landSurfaceDataset.loc[count, 'Complete Destruction'] = num_3 / (num_1 + num_2 + num_3) * 100
count += 1
landSurfaceDataset.head()
| landsurfacecondition | Low Damage | Medium Damage | Complete Destruction | |
|---|---|---|---|---|
| 0 | land_surface_condition_n | 7.194326 | 60.470615 | 32.33506 |
| 1 | land_surface_condition_o | 7.239057 | 56.613757 | 36.147186 |
| 2 | land_surface_condition_t | 10.133929 | 56.315136 | 33.550935 |
#Identify which land surface condition has the highest complete destruction percentage by sorting
landSurfaceDatasetSorted = landSurfaceDataset.sort_values(by='Complete Destruction', ascending=False)
landSurfaceDatasetSorted.reset_index(inplace=True)
landSurfaceDatasetSorted
| index | landsurfacecondition | Low Damage | Medium Damage | Complete Destruction | |
|---|---|---|---|---|---|
| 0 | 1 | land_surface_condition_o | 7.239057 | 56.613757 | 36.147186 |
| 1 | 2 | land_surface_condition_t | 10.133929 | 56.315136 | 33.550935 |
| 2 | 0 | land_surface_condition_n | 7.194326 | 60.470615 | 32.33506 |
formattedList = []
for i, landSurface in enumerate(list(landSurfaceDatasetSorted.landsurfacecondition)):
formatted_string = ' '.join(landSurfaceDatasetSorted.landsurfacecondition[i].split('_')[2:]).title()
formattedList.append(formatted_string)
fig, ax = plt.subplots(figsize=(20,10))
sns.barplot(x='landsurfacecondition', y='Complete Destruction', data=landSurfaceDatasetSorted,
palette="magma", ax=ax)
ax.set_ylabel('Percent', fontsize=25)
plt.yticks(fontsize=20)
ax.set_xlabel('Land Condition Type', fontsize=25)
ax.set_xticklabels(formattedList, rotation=90, ha='right', fontsize=20)
ax.set_title('Percent of Buildings Completely Destroyed (By Land Condition Type)', fontsize=30);
fig, ax = plt.subplots(figsize=(20,10))
sns.barplot(x='landsurfacecondition', y='Medium Damage', data=landSurfaceDatasetSorted,
palette="magma", ax=ax)
ax.set_ylabel('Percent', fontsize=25)
plt.yticks(fontsize=20)
ax.set_xlabel('Land Condition Type', fontsize=25)
ax.set_xticklabels(formattedList, rotation=90, ha='right', fontsize=20)
ax.set_title('Percent of Buildings with Medium Damage (By Land Condition Type)', fontsize=30)
Text(0.5, 1.0, 'Percent of Buildings with Medium Damage (By Land Condition Type)')
fig, ax = plt.subplots(figsize=(20,10))
sns.barplot(x='landsurfacecondition', y='Low Damage', data=landSurfaceDatasetSorted,
palette="magma", ax=ax)
ax.set_ylabel('Percent', fontsize=25)
plt.yticks(fontsize=20)
ax.set_xlabel('Land Condition Type', fontsize=25)
ax.set_xticklabels(formattedList, rotation=90, ha='right', fontsize=20)
ax.set_title('Percent of Buildings with Low Damage (By Land Condition Type)', fontsize=30)
Text(0.5, 1.0, 'Percent of Buildings with Low Damage (By Land Condition Type)')
From the visualizatons above, at first glance it might seem that buildings with a land surface condition type "t" have the highest count of buildings that have sustained medium damage or been completely destroyed. However, when looking through the value counts it can be identified that this is due to the large percentage of buildings being having condition "t". From the value counts it can be seen that 35528 buildings have a land surface condition of "n", 8316 have a condition of "o" and 216757 have a land contion of "t"
Do the the varying quantities of data for each category, the percentage of buildings for each damage grade was calculated and visualized in order to get a better understanding. From this it can be seen that buildings with land type "o" have a slightly higher chance (36.15%) of being completely destroyed over the other two land types.
It can also be seen that land type "t" has the highest percentage of buildings with low damage (10.13%) than other types. Since the largest number of buildings have a "t" ground condition, this indicates that "t" ground conditions offer better protection from medium and high earthquake damage.
#analyze superstructure vs damage grade
superstructureCols = [col for col in finalDataset if col.startswith('has_superstructure')]
for col in superstructureCols:
plt.figure(figsize=(15,5))
ax = sns.countplot(x=finalDataset[col],hue=finalDataset.damage_grade,palette="magma")
for container in ax.containers:
ax.bar_label(container)
plt.title(col+ " VS Damage",fontweight="bold")
plt.legend(["Low damage","Avg damage","High damage"])
plt.show()
for col in superstructureCols:
print(finalDataset[col].value_counts())
print(finalDataset[col].value_counts().sum())
0 237500 1 23101 Name: has_superstructure_adobe_mud, dtype: int64 260601 1 198561 0 62040 Name: has_superstructure_mud_mortar_stone, dtype: int64 260601 0 251654 1 8947 Name: has_superstructure_stone_flag, dtype: int64 260601 0 255849 1 4752 Name: has_superstructure_cement_mortar_stone, dtype: int64 260601 0 242840 1 17761 Name: has_superstructure_mud_mortar_brick, dtype: int64 260601 0 240986 1 19615 Name: has_superstructure_cement_mortar_brick, dtype: int64 260601 0 194151 1 66450 Name: has_superstructure_timber, dtype: int64 260601 0 238447 1 22154 Name: has_superstructure_bamboo, dtype: int64 260601 0 249502 1 11099 Name: has_superstructure_rc_non_engineered, dtype: int64 260601 0 256468 1 4133 Name: has_superstructure_rc_engineered, dtype: int64 260601 0 256696 1 3905 Name: has_superstructure_other, dtype: int64 260601
superstructureCountDataset = pd.DataFrame(columns=['superstructure', 'frequency'])
count = 0
for col in superstructureCols:
df1 = finalDataset[finalDataset[col] == 1]
superstructureCountDataset.loc[count, 'superstructure'] = col
superstructureCountDataset.loc[count, 'frequency'] = len(df1)
count += 1
superstructureCountDataset
| superstructure | frequency | |
|---|---|---|
| 0 | has_superstructure_adobe_mud | 23101 |
| 1 | has_superstructure_mud_mortar_stone | 198561 |
| 2 | has_superstructure_stone_flag | 8947 |
| 3 | has_superstructure_cement_mortar_stone | 4752 |
| 4 | has_superstructure_mud_mortar_brick | 17761 |
| 5 | has_superstructure_cement_mortar_brick | 19615 |
| 6 | has_superstructure_timber | 66450 |
| 7 | has_superstructure_bamboo | 22154 |
| 8 | has_superstructure_rc_non_engineered | 11099 |
| 9 | has_superstructure_rc_engineered | 4133 |
| 10 | has_superstructure_other | 3905 |
#plotting a pie chart to visualize count percentages
superstructureCountDataset['frequency'].plot(kind='pie', autopct="%.1f", labels = superstructureCountDataset['superstructure'], figsize=(10,10))
<AxesSubplot:ylabel='frequency'>
#placeholder DF to hold the percentage for each level of damage for superstructure binary columns
superstructureDataset = pd.DataFrame(columns=['superstructure', 'Low Damage', 'Medium Damage', 'Complete Destruction'])
count = 0
for col in superstructureCols:
df1 = finalDataset[finalDataset[col] == 1]
num_1 = len(df1[df1['damage_grade'] == 1])
num_2 = len(df1[df1['damage_grade'] == 2])
num_3 = len(df1[df1['damage_grade'] == 3])
superstructureDataset.loc[count, 'superstructure'] = col
superstructureDataset.loc[count, 'Low Damage'] = num_1 / (num_1 + num_2 + num_3) * 100
superstructureDataset.loc[count, 'Medium Damage'] = num_2 / (num_1 + num_2 + num_3) * 100
superstructureDataset.loc[count, 'Complete Destruction'] = num_3 / (num_1 + num_2 + num_3) * 100
count += 1
superstructureDataset.head()
| superstructure | Low Damage | Medium Damage | Complete Destruction | |
|---|---|---|---|---|
| 0 | has_superstructure_adobe_mud | 2.571317 | 60.179213 | 37.24947 |
| 1 | has_superstructure_mud_mortar_stone | 4.411742 | 57.386395 | 38.201862 |
| 2 | has_superstructure_stone_flag | 2.034201 | 50.676204 | 47.289594 |
| 3 | has_superstructure_cement_mortar_stone | 17.445286 | 68.350168 | 14.204545 |
| 4 | has_superstructure_mud_mortar_brick | 3.507685 | 65.863409 | 30.628906 |
superstructureDataset.value_counts()
superstructure Low Damage Medium Damage Complete Destruction has_superstructure_adobe_mud 2.571317 60.179213 37.249470 1 has_superstructure_bamboo 12.814842 63.198519 23.986639 1 has_superstructure_cement_mortar_brick 36.222279 58.225848 5.551874 1 has_superstructure_cement_mortar_stone 17.445286 68.350168 14.204545 1 has_superstructure_mud_mortar_brick 3.507685 65.863409 30.628906 1 has_superstructure_mud_mortar_stone 4.411742 57.386395 38.201862 1 has_superstructure_other 16.798976 57.567222 25.633803 1 has_superstructure_rc_engineered 64.263247 33.922090 1.814662 1 has_superstructure_rc_non_engineered 34.750878 52.545274 12.703847 1 has_superstructure_stone_flag 2.034201 50.676204 47.289594 1 has_superstructure_timber 11.500376 60.477050 28.022573 1 dtype: int64
superstructureDatasetSorted = superstructureDataset.sort_values(by='Complete Destruction', ascending=False)
superstructureDatasetSorted.reset_index(inplace=True)
formattedList = []
for i, superstructure in enumerate(list(superstructureDatasetSorted.superstructure)):
formatted_string = ' '.join(superstructureDatasetSorted.superstructure[i].split('_')[2:]).title()
formattedList.append(formatted_string)
fig, ax = plt.subplots(figsize=(20,10))
sns.barplot(x='superstructure', y='Complete Destruction', data=superstructureDatasetSorted,
palette="magma", ax=ax)
ax.set_ylabel('Percent', fontsize=25)
plt.yticks(fontsize=20)
ax.set_xlabel('SuperStructure Type', fontsize=25)
ax.set_xticklabels(formattedList, rotation=90, ha='right', fontsize=20)
ax.set_title('Percent of Buildings Completely Destroyed (By Superstructure Type)', fontsize=30);
fig, ax = plt.subplots(figsize=(20,10))
sns.barplot(x='superstructure', y='Medium Damage', data=superstructureDatasetSorted,
palette="magma", ax=ax)
ax.set_ylabel('Percent', fontsize=25)
plt.yticks(fontsize=20)
ax.set_xlabel('SuperStructure Type', fontsize=25)
ax.set_xticklabels(formattedList, rotation=90, ha='right', fontsize=20)
ax.set_title('Percent Medium Damage(By Superstructure Type)', fontsize=30);
fig, ax = plt.subplots(figsize=(20,10))
sns.barplot(x='superstructure', y='Low Damage', data=superstructureDatasetSorted,
palette="magma", ax=ax)
ax.set_ylabel('Percent', fontsize=25)
plt.yticks(fontsize=20)
ax.set_xlabel('SuperStructure Type', fontsize=25)
ax.set_xticklabels(formattedList, rotation=90, ha='right', fontsize=20)
ax.set_title('Percent Low Damage (By Superstructure Type)', fontsize=30);
From the pie chart it can be seen that over 50 percent of the buildings have a "mud mortar stone" superstructure with the second highest frequency of 17.5% for timber. Since the number of buildings with different superstructures varies greatly with each type, the percentage damage for the building for each type was investigated.
"Rc reengineered" superstructures seems to offer the best protection with the lowest percentage of buildings with medium or complete damage and the majority of the buildings only sustaining low damage. However, only 1.1% of buildings in the dataset have this type of superstructure.
Other superstructures that are have a high risk of being completely destroyed or sustaining medium damage are the "stone flag", "mud mortar stone", "adobe mud" types. This is especially concerning due to the large percentage of buildings are constructed with a "mud mortar stone" superstructure. Another observation is an high 47.28% complete dustruction for "stone flag"
The types "mud mortar brick", "timber", "bamboo", "cement mortar stone" and "other" seem to offer a greater level of protection with the majority of building suffering medium damage instead of completely being destroyed.
groundFloorTypeCols = [col for col in finalDataset if col.startswith('ground_floor_type')]
groundFloorTypeCols
['ground_floor_type_f', 'ground_floor_type_m', 'ground_floor_type_v', 'ground_floor_type_x', 'ground_floor_type_z']
for col in groundFloorTypeCols:
plt.figure(figsize=(15,5))
ax =sns.countplot(x=finalDataset[col],hue=finalDataset.damage_grade,palette="magma")
for container in ax.containers:
ax.bar_label(container)
plt.title(col+ " VS Damage",fontweight="bold")
plt.legend(["Low damage","Avg damage","High damage"])
plt.show()
for col in groundFloorTypeCols:
print(finalDataset[col].value_counts())
print(finalDataset[col].value_counts().sum())
1 209619 0 50982 Name: ground_floor_type_f, dtype: int64 260601 0 260093 1 508 Name: ground_floor_type_m, dtype: int64 260601 0 236008 1 24593 Name: ground_floor_type_v, dtype: int64 260601 0 235724 1 24877 Name: ground_floor_type_x, dtype: int64 260601 0 259597 1 1004 Name: ground_floor_type_z, dtype: int64 260601
#plotting a pie chart to visualize count percentages
mergedDataset['ground_floor_type'].value_counts(normalize=True).plot(kind='pie', autopct="%.1f", figsize=(10,10))
<AxesSubplot:ylabel='ground_floor_type'>
It can be seen from the pie chart above, that land surface condition type "f" make up 80.4% of the buildings
#placeholder DF to hold the percentage for each level of damage for ground floor type binary columns
groundFloorTypeDataset = pd.DataFrame(columns=['groundfloortype', 'Low Damage', 'Medium Damage', 'Complete Destruction'])
count = 0
for col in groundFloorTypeCols:
df1 = finalDataset[finalDataset[col] == 1]
num_1 = len(df1[df1['damage_grade'] == 1])
num_2 = len(df1[df1['damage_grade'] == 2])
num_3 = len(df1[df1['damage_grade'] == 3])
groundFloorTypeDataset.loc[count, 'groundfloortype'] = col
groundFloorTypeDataset.loc[count, 'Low Damage'] = num_1 / (num_1 + num_2 + num_3) * 100
groundFloorTypeDataset.loc[count, 'Medium Damage'] = num_2 / (num_1 + num_2 + num_3) * 100
groundFloorTypeDataset.loc[count, 'Complete Destruction'] = num_3 / (num_1 + num_2 + num_3) * 100
count += 1
groundFloorTypeDataset.head()
| groundfloortype | Low Damage | Medium Damage | Complete Destruction | |
|---|---|---|---|---|
| 0 | ground_floor_type_f | 5.950796 | 57.188041 | 36.861162 |
| 1 | ground_floor_type_m | 17.716535 | 67.519685 | 14.76378 |
| 2 | ground_floor_type_v | 41.918432 | 52.74265 | 5.338918 |
| 3 | ground_floor_type_x | 8.248583 | 58.431483 | 33.319934 |
| 4 | ground_floor_type_z | 19.820717 | 52.988048 | 27.191235 |
#Sort values for clearer representation and ease of plotting
groundFloorTypeDatasetSorted = groundFloorTypeDataset.sort_values(by='Complete Destruction', ascending=False)
groundFloorTypeDatasetSorted.reset_index(inplace=True)
groundFloorTypeDatasetSorted
| index | groundfloortype | Low Damage | Medium Damage | Complete Destruction | |
|---|---|---|---|---|---|
| 0 | 0 | ground_floor_type_f | 5.950796 | 57.188041 | 36.861162 |
| 1 | 3 | ground_floor_type_x | 8.248583 | 58.431483 | 33.319934 |
| 2 | 4 | ground_floor_type_z | 19.820717 | 52.988048 | 27.191235 |
| 3 | 1 | ground_floor_type_m | 17.716535 | 67.519685 | 14.76378 |
| 4 | 2 | ground_floor_type_v | 41.918432 | 52.74265 | 5.338918 |
formattedList = []
for i, groundfloortype in enumerate(list(groundFloorTypeDatasetSorted.groundfloortype)):
formatted_string = ' '.join(groundFloorTypeDatasetSorted.groundfloortype[i].split('_')[2:]).title()
formattedList.append(formatted_string)
fig, ax = plt.subplots(figsize=(20,10))
sns.barplot(x='groundfloortype', y='Complete Destruction', data=groundFloorTypeDatasetSorted,
palette="magma", ax=ax)
ax.set_ylabel('Percent', fontsize=25)
plt.yticks(fontsize=20)
ax.set_xlabel('Ground Floor Type', fontsize=25)
ax.set_xticklabels(formattedList, rotation=90, ha='right', fontsize=20)
ax.set_title('Percent of Buildings Completely Destroyed (By Ground Floor Type)', fontsize=30);
fig, ax = plt.subplots(figsize=(20,10))
sns.barplot(x='groundfloortype', y='Medium Damage', data=groundFloorTypeDatasetSorted,
palette="magma", ax=ax)
ax.set_ylabel('Percent', fontsize=25)
plt.yticks(fontsize=20)
ax.set_xlabel('Ground Floor Type', fontsize=25)
ax.set_xticklabels(formattedList, rotation=90, ha='right', fontsize=20)
ax.set_title('Percent of Buildings with Medium Damage (By Ground Floor Type)', fontsize=30);
fig, ax = plt.subplots(figsize=(20,10))
sns.barplot(x='groundfloortype', y='Low Damage', data=groundFloorTypeDatasetSorted,
palette="magma", ax=ax)
ax.set_ylabel('Percent', fontsize=25)
plt.yticks(fontsize=20)
ax.set_xlabel('Ground Floor Type', fontsize=25)
ax.set_xticklabels(formattedList, rotation=90, ha='right', fontsize=20)
ax.set_title('Percent of Buildings with Low Damage (By Ground Floor Type)', fontsize=30);
It can be seen that 80.4% of the buildings have ground floor type "f". From the percent damage analysis it can also be seen that this same type is very succeptible to medium or large amounts of damage in the case of an earthquake
Type "x" and Type "z" are also successtible to high and medium levels of damage, but not to the extent of type "f" and a considerably smaller percentage of the buildings have these types.
Type "m" and "v" seems to be the most resistant to complete destruction, but a considerably large percentage of these buildings suffer medium damage
#visualize distribution
fig, axs = plt.subplots(1, 4, figsize=(30, 8))
for ax, col in zip(axs, ['age', 'area_percentage', 'height_percentage', 'count_floors_pre_eq']):
sns.histplot(finalDataset[col], ax=ax);
#plot boxcharts and scan for outliers
fig, axs = plt.subplots(1, 4, figsize=(30, 8))
for ax, col in zip(axs, ['age', 'area_percentage', 'height_percentage', 'count_floors_pre_eq']):
sns.boxplot(x= finalDataset[col], ax=ax)
#create a intemediary dataset to hold calculated percent damage values for the 3 grades against height percentage
heightDataset = pd.DataFrame(columns=['Height Percentage', 'Percent Low Damage', 'Percent Medium Damage', 'Percent Complete Destruction'])
count = 0
for height in list(finalDataset.height_percentage.unique()):
denom = len(finalDataset[finalDataset.height_percentage == height])
num_complete = len(finalDataset[finalDataset.damage_grade == 3][finalDataset[finalDataset.damage_grade == 3].height_percentage == height])
percent_complete = (num_complete / denom) * 100
num_medium = len(finalDataset[finalDataset.damage_grade == 2][finalDataset[finalDataset.damage_grade == 2].height_percentage == height])
percent_medium = (num_medium / denom) * 100
num_low= len(finalDataset[finalDataset.damage_grade == 1][finalDataset[finalDataset.damage_grade == 1].height_percentage == height])
percent_low = (num_low / denom) * 100
heightDataset.loc[count, 'Height Percentage'] = height
heightDataset.loc[count, 'Percent Complete Destruction'] = percent_complete
heightDataset.loc[count, 'Percent Medium Damage'] = percent_medium
heightDataset.loc[count, 'Percent Low Damage'] = percent_low
count += 1
#Plotting height percentage vs percent complete destruction
fig, ax = plt.subplots(figsize=(10,10))
sns.lineplot(x=heightDataset['Height Percentage'],
y=heightDataset['Percent Complete Destruction'].astype(float),
color='xkcd:blue')
ax.set_xlabel('Building Height')
ax.set_ylabel('Percent of Buildings with Complete Destruction')
ax.set_title('% Complete Destruction by Building Height Percentage');
heightDataset['Height Percentage'].value_counts()
2 1 16 1 31 1 28 1 26 1 25 1 24 1 23 1 21 1 20 1 19 1 18 1 17 1 15 1 3 1 14 1 13 1 12 1 11 1 10 1 9 1 8 1 7 1 6 1 5 1 4 1 32 1 Name: Height Percentage, dtype: int64
plt.plot(figsize=(25,15))
plt.xlim(0, 40)
sns.kdeplot(x=finalDataset['height_percentage'], shade=True)
<AxesSubplot:xlabel='height_percentage', ylabel='Density'>
plt.hist(finalDataset['height_percentage'], bins=5, edgecolor='black');
plt.xlabel('height percentage');
plt.ylabel('count')
Text(0, 0.5, 'count')
len(finalDataset[finalDataset.height_percentage > 20])
111
#since there are only 111 entries with height percent greater than 20, remove these entries
normalisedHeightDataset = heightDataset[heightDataset['Height Percentage'] <= 20]
len(finalDataset), len(finalDataset[finalDataset.height_percentage <= 20])
(260601, 260490)
fig, ax = plt.subplots(figsize=(10,10))
sns.lineplot(x=normalisedHeightDataset['Height Percentage'], y=normalisedHeightDataset['Percent Complete Destruction'].astype(float))
ax.set_xlabel('Normalized Height of the Building Footprint')
ax.set_title('% Complete Destruction by Normalized Height of Building Footprint');
The same operations were carried out for low and medium percent damages
#Plotting height percentage vs medium damage
fig, ax = plt.subplots(figsize=(10,10))
sns.lineplot(x=heightDataset['Height Percentage'],
y=heightDataset['Percent Medium Damage'].astype(float),
color='xkcd:blue')
ax.set_xlabel('Building Height')
ax.set_ylabel('Percent of Buildings with Medium Damage')
ax.set_title('% Medium Damage by Building Height Percentage');
fig, ax = plt.subplots(figsize=(10,10))
sns.lineplot(x=normalisedHeightDataset['Height Percentage'], y=normalisedHeightDataset['Percent Medium Damage'].astype(float))
ax.set_xlabel('Normalized Height of the Building Footprint')
ax.set_title('% Medium Damage by Normalized Height of Building Footprint');
#Plotting height percentage vs percent low damage
fig, ax = plt.subplots(figsize=(10,10))
sns.lineplot(x=heightDataset['Height Percentage'],
y=heightDataset['Percent Low Damage'].astype(float),
color='xkcd:blue')
ax.set_xlabel('Building Height')
ax.set_ylabel('Percent of Buildings with Low Damage')
ax.set_title('% Low Damage by Building Height Percentage');
fig, ax = plt.subplots(figsize=(10,10))
sns.lineplot(x=normalisedHeightDataset['Height Percentage'], y=normalisedHeightDataset['Percent Low Damage'].astype(float))
ax.set_xlabel('Normalized Height of the Building Footprint')
ax.set_title('% Low Damage by Normalized Height of Building Footprint');
From the initial analysis it can be seen that the relationship between normalized height of the building and the percentage of buildings seems unrelated. However, after further analysis of the distribution of the features it was found that only 112 values of the 260601 provided have a height percentage of over 20. Dropping these values in the visulation shows a trend where buiding with a percent height until around 19 are less likely to be completely destroyed in the event of an earthquake. This is also reflected in the normalized percent low damage graphs. The percent medium damage graphs, however, do not reveal any significant trend
#create a intemediary dataset to hold calculated percent damage values for the 3 grades against area percentage
areaDataset = pd.DataFrame(columns=['Area Percentage', 'Percent Low Damage', 'Percent Medium Damage', 'Percent Complete Destruction'])
count = 0
for area in list(finalDataset.area_percentage.unique()):
denom = len(finalDataset[finalDataset.area_percentage == area])
num_complete = len(finalDataset[finalDataset.damage_grade == 3][finalDataset[finalDataset.damage_grade == 3].area_percentage == area])
percent_complete = (num_complete / denom) * 100
num_medium = len(finalDataset[finalDataset.damage_grade == 2][finalDataset[finalDataset.damage_grade == 2].area_percentage == area])
percent_medium = (num_medium / denom) * 100
num_low= len(finalDataset[finalDataset.damage_grade == 1][finalDataset[finalDataset.damage_grade == 1].area_percentage == area])
percent_low = (num_low / denom) * 100
areaDataset.loc[count, 'Area Percentage'] = area
areaDataset.loc[count, 'Percent Complete Destruction'] = percent_complete
areaDataset.loc[count, 'Percent Medium Damage'] = percent_medium
areaDataset.loc[count, 'Percent Low Damage'] = percent_low
count += 1
#Plotting area percentage vs percent complete destruction
fig, ax = plt.subplots(figsize=(10,10))
sns.lineplot(x=areaDataset['Area Percentage'],
y=areaDataset['Percent Complete Destruction'].astype(float),
color='xkcd:blue')
ax.set_xlabel('Area of the Building Footprint')
ax.set_ylabel('Percent of Buildings Completely Destroyed')
ax.set_title('% Complete Destruction by Area of Building Footprint');
areaDataset['Area Percentage'].value_counts()
1 1
54 1
62 1
61 1
60 1
..
27 1
26 1
25 1
24 1
100 1
Name: Area Percentage, Length: 84, dtype: int64
plt.plot(figsize=(25,15))
plt.xlim(0, 40)
sns.histplot(x=finalDataset['area_percentage'])
<AxesSubplot:xlabel='area_percentage', ylabel='Count'>
plt.hist(finalDataset['area_percentage'], bins=5, edgecolor='black');
plt.xlabel('area percentage');
plt.ylabel('count')
Text(0, 0.5, 'count')
len(finalDataset[finalDataset.area_percentage > 30])
992
#since there are less than 1000 entries with area percent greater than 30, remove entries with areas percent greater than 30
normalisedAreaDataset = areaDataset[areaDataset['Area Percentage'] <= 30]
len(finalDataset), len(finalDataset[finalDataset.area_percentage <= 30])
(260601, 259609)
fig, ax = plt.subplots(figsize=(10,10))
sns.lineplot(x=normalisedAreaDataset['Area Percentage'],
y=normalisedAreaDataset['Percent Complete Destruction'].astype(float),
color='xkcd:blue')
ax.set_xlabel('Normalized Area of the Building Footprint')
ax.set_ylabel('Percent of Buildings Completely Destroyed')
ax.set_title('% Complete Destruction by Normalized Area of Building Footprint');
The same operations were carried out for low and medium percent damages
#Plotting area percentage vs percent medium damage
fig, ax = plt.subplots(figsize=(10,10))
sns.lineplot(x=areaDataset['Area Percentage'],
y=areaDataset['Percent Medium Damage'].astype(float),
color='xkcd:blue')
ax.set_xlabel('Area of the Building Footprint')
ax.set_ylabel('Percent of Buildings with Medium Damage')
ax.set_title('% Medium Damage by Area of Building Footprint');
fig, ax = plt.subplots(figsize=(10,10))
sns.lineplot(x=normalisedAreaDataset['Area Percentage'],
y=normalisedAreaDataset['Percent Medium Damage'].astype(float),
color='xkcd:blue')
ax.set_xlabel('Normalized Area of the Building Footprint')
ax.set_ylabel('Percent of Buildings with Medium Damage')
ax.set_title('% Medium Damage by Normalized Area of Building Footprint');
#Plotting area percentage vs percent low damage
fig, ax = plt.subplots(figsize=(10,10))
sns.lineplot(x=areaDataset['Area Percentage'],
y=areaDataset['Percent Low Damage'].astype(float),
color='xkcd:blue')
ax.set_xlabel('Area of the Building Footprint')
ax.set_ylabel('Percent of Buildings with Low Damage')
ax.set_title('% Low Damage by Area of Building Footprint');
fig, ax = plt.subplots(figsize=(10,10))
sns.lineplot(x=normalisedAreaDataset['Area Percentage'],
y=normalisedAreaDataset['Percent Low Damage'].astype(float),
color='xkcd:blue')
ax.set_xlabel('Normalized Area of the Building Footprint')
ax.set_ylabel('Percent of Buildings with Low Damage')
ax.set_title('% Low Damage by Normalized Area of Building Footprint');
Similar to with the height percentage, initial visualization and analysis of the data show no major trends. However, once again from the distribution it can be found that only 992 of the 260601 entries have a percentage normalized area more than 30.
Removal of these values shows a clear downward trend where the risk of a completely destroyed is reduces with building area percentages less than 30 and seems to increase and fluctuate at higher values. As with the height percent graphs in RQ4, these findings are mirrored in the normalized percent low damage graphs which increases during the same range of 0 to 30.
#analyze secondary use vs damage grade
secondaryCols = [col for col in finalDataset if col.startswith('has_secondary')]
for col in secondaryCols:
plt.figure(figsize=(15,5))
ax = sns.countplot(x = finalDataset[col],hue=finalDataset.damage_grade,palette="magma")
for container in ax.containers:
ax.bar_label(container)
plt.title(col+ " VS Damage",fontweight="bold")
plt.legend(["Low damage","Avg damage","High damage"])
plt.show()
for col in secondaryCols:
print(finalDataset[col].value_counts())
print(finalDataset[col].value_counts().sum())
0 231445 1 29156 Name: has_secondary_use, dtype: int64 260601 0 243824 1 16777 Name: has_secondary_use_agriculture, dtype: int64 260601 0 251838 1 8763 Name: has_secondary_use_hotel, dtype: int64 260601 0 258490 1 2111 Name: has_secondary_use_rental, dtype: int64 260601 0 260356 1 245 Name: has_secondary_use_institution, dtype: int64 260601 0 260507 1 94 Name: has_secondary_use_school, dtype: int64 260601 0 260322 1 279 Name: has_secondary_use_industry, dtype: int64 260601 0 260552 1 49 Name: has_secondary_use_health_post, dtype: int64 260601 0 260563 1 38 Name: has_secondary_use_gov_office, dtype: int64 260601 0 260578 1 23 Name: has_secondary_use_use_police, dtype: int64 260601 0 259267 1 1334 Name: has_secondary_use_other, dtype: int64 260601
secondaryCols.remove('has_secondary_use')
secondaryCountDataset = pd.DataFrame(columns=['secondaryuse', 'frequency'])
count = 0
for col in secondaryCols:
df1 = finalDataset[finalDataset[col] == 1]
secondaryCountDataset.loc[count, 'secondaryuse'] = col
secondaryCountDataset.loc[count, 'frequency'] = len(df1)
count += 1
secondaryCountDataset
| secondaryuse | frequency | |
|---|---|---|
| 0 | has_secondary_use_agriculture | 16777 |
| 1 | has_secondary_use_hotel | 8763 |
| 2 | has_secondary_use_rental | 2111 |
| 3 | has_secondary_use_institution | 245 |
| 4 | has_secondary_use_school | 94 |
| 5 | has_secondary_use_industry | 279 |
| 6 | has_secondary_use_health_post | 49 |
| 7 | has_secondary_use_gov_office | 38 |
| 8 | has_secondary_use_use_police | 23 |
| 9 | has_secondary_use_other | 1334 |
#plotting a pie chart to visualize count percentages
secondaryCountDataset['frequency'].plot(kind='pie', autopct="%.1f", labels = secondaryCountDataset['secondaryuse'], figsize=(10,10))
<AxesSubplot:ylabel='frequency'>
#placeholder DF to hold the percentage for each level of damage for secondary_use binary columns
secondaryDataset = pd.DataFrame(columns=['secondary_use', 'Low Damage', 'Medium Damage', 'Complete Destruction'])
count = 0
for col in secondaryCols:
df1 = finalDataset[finalDataset[col] == 1]
num_1 = len(df1[df1['damage_grade'] == 1])
num_2 = len(df1[df1['damage_grade'] == 2])
num_3 = len(df1[df1['damage_grade'] == 3])
secondaryDataset.loc[count, 'secondary_use'] = col
secondaryDataset.loc[count, 'Low Damage'] = num_1 / (num_1 + num_2 + num_3) * 100
secondaryDataset.loc[count, 'Medium Damage'] = num_2 / (num_1 + num_2 + num_3) * 100
secondaryDataset.loc[count, 'Complete Destruction'] = num_3 / (num_1 + num_2 + num_3) * 100
count += 1
secondaryDataset.head()
| secondary_use | Low Damage | Medium Damage | Complete Destruction | |
|---|---|---|---|---|
| 0 | has_secondary_use_agriculture | 4.941289 | 63.65262 | 31.406092 |
| 1 | has_secondary_use_hotel | 25.288143 | 57.719959 | 16.991898 |
| 2 | has_secondary_use_rental | 41.544292 | 49.786831 | 8.668877 |
| 3 | has_secondary_use_institution | 40.408163 | 52.653061 | 6.938776 |
| 4 | has_secondary_use_school | 31.914894 | 50.0 | 18.085106 |
secondaryDatasetSorted = secondaryDataset.sort_values(by='Complete Destruction', ascending=False)
secondaryDatasetSorted.reset_index(inplace=True)
formattedList = []
for i, secondary in enumerate(list(secondaryDatasetSorted.secondary_use)):
formatted_string = ' '.join(secondaryDatasetSorted.secondary_use[i].split('_')[3:]).title()
formattedList.append(formatted_string)
fig, ax = plt.subplots(figsize=(20,10))
sns.barplot(x='secondary_use', y='Complete Destruction', data=secondaryDatasetSorted,
palette="magma", ax=ax)
ax.set_ylabel('Percent', fontsize=25)
plt.yticks(fontsize=20)
ax.set_xlabel('Secondary Use', fontsize=25)
ax.set_xticklabels(formattedList, rotation=90, fontsize=20)
ax.set_title('Percent Completely Destroyed (By Secondary Use)', fontsize=30);
fig, ax = plt.subplots(figsize=(20,10))
sns.barplot(x='secondary_use', y='Medium Damage', data=secondaryDatasetSorted,
palette="magma", ax=ax)
ax.set_ylabel('Percent', fontsize=25)
plt.yticks(fontsize=20)
ax.set_xlabel('Secondary Use', fontsize=25)
ax.set_xticklabels(formattedList, rotation=90, fontsize=20)
ax.set_title('Percent Medium Damage (By Secondary Use)', fontsize=30);
fig, ax = plt.subplots(figsize=(20,10))
sns.barplot(x='secondary_use', y='Low Damage', data=secondaryDatasetSorted,
palette="magma", ax=ax)
ax.set_ylabel('Percent', fontsize=25)
plt.yticks(fontsize=20)
ax.set_xlabel('Secondary Use', fontsize=25)
ax.set_xticklabels(formattedList, rotation=90, fontsize=20)
ax.set_title('Percent Low Damage (By Secondary Use)', fontsize=30);
The data shows that the majority of buildings with secondary use are used for agriculture (56.5%). and hotel (29.5%). Agricultural buildings are also seen to suffer the highest percentage of complete destruction and high levels of medium damage. This is likely due to the open locations and large areas that these building are generally found in.
The data shows that the majority of buildings with secondary use are used for agriculture (56.5%). and hotel (29.5%). Agricultural buildings are also seen to suffer the highest percentage of complete destruction and high levels of medium damage. This is likely due to the open locations and large areas that these building are generally found in.
Police stations and industry secondary use builds also seem to be largely affected by high and medium levels of damage. This is likely through the heave use and wear-and-tear of buildings. Machines may be operating in industrial areas with vibrations affecting the buildings. Police stations are also found in metropolitan areas with heavy foot traffic.
Another point to note is that schools seem to be affected by a high percentage of medium damage. Since the lives of children are at stake in the case of an earthquake, measures would need to be taken to bring this down to low or no damage at all.
#plot damage grade vs building age counts
plt.figure(figsize=(15,10))
sns.countplot(x=finalDataset["age"],hue=finalDataset["damage_grade"],palette="magma")
<AxesSubplot:xlabel='age', ylabel='count'>
#Create a df that holds the % level of damage by age
ageDataset = pd.DataFrame(columns=['age', 'Low Damage', 'Medium Damage', 'Complete Destruction'])
count = 0
for age in range(0,105,5):
num_1 = 0
num_2 = 0
num_3 = 0
df1 = finalDataset[(finalDataset.age == age) & (finalDataset.damage_grade == 1)]
num_1 += len(df1)
df2 = finalDataset[(finalDataset.age == age) & (finalDataset.damage_grade == 2)]
num_2 += len(df2)
df3 = finalDataset[(finalDataset.age == age) & (finalDataset.damage_grade == 3)]
num_3 += len(df3)
ageDataset.loc[count, 'age'] = age
ageDataset.loc[count, 'Low Damage'] = num_1 / (num_1 + num_2 + num_3) * 100
ageDataset.loc[count, 'Medium Damage'] = num_2 / (num_1 + num_2 + num_3) * 100
ageDataset.loc[count, 'Complete Destruction'] = num_3 / (num_1 + num_2 + num_3) * 100
count += 1
ageDataset
| age | Low Damage | Medium Damage | Complete Destruction | |
|---|---|---|---|---|
| 0 | 0 | 27.698629 | 48.849891 | 23.45148 |
| 1 | 5 | 16.589014 | 56.47387 | 26.937116 |
| 2 | 10 | 11.209379 | 57.512341 | 31.278281 |
| 3 | 15 | 7.992224 | 57.670092 | 34.337684 |
| 4 | 20 | 6.071717 | 57.805606 | 36.122677 |
| 5 | 25 | 4.641714 | 57.75671 | 37.601576 |
| 6 | 30 | 3.550033 | 57.932106 | 38.517861 |
| 7 | 35 | 3.286648 | 58.319328 | 38.394024 |
| 8 | 40 | 2.680178 | 57.656975 | 39.662847 |
| 9 | 45 | 2.54723 | 59.477818 | 37.974952 |
| 10 | 50 | 2.342566 | 57.268844 | 40.38859 |
| 11 | 55 | 2.213478 | 59.763896 | 38.022627 |
| 12 | 60 | 2.104097 | 58.554817 | 39.341085 |
| 13 | 65 | 1.424755 | 61.887801 | 36.687444 |
| 14 | 70 | 1.620253 | 60.253165 | 38.126582 |
| 15 | 75 | 1.5625 | 61.914062 | 36.523438 |
| 16 | 80 | 0.91653 | 57.577741 | 41.505728 |
| 17 | 85 | 0.826446 | 65.053129 | 34.120425 |
| 18 | 90 | 1.013825 | 63.133641 | 35.852535 |
| 19 | 95 | 2.657005 | 58.937198 | 38.405797 |
| 20 | 100 | 0.733138 | 66.788856 | 32.478006 |
ageDatasetMelted = ageDataset.melt(id_vars='age')
ageDatasetMelted
| age | variable | value | |
|---|---|---|---|
| 0 | 0 | Low Damage | 27.698629 |
| 1 | 5 | Low Damage | 16.589014 |
| 2 | 10 | Low Damage | 11.209379 |
| 3 | 15 | Low Damage | 7.992224 |
| 4 | 20 | Low Damage | 6.071717 |
| ... | ... | ... | ... |
| 58 | 80 | Complete Destruction | 41.505728 |
| 59 | 85 | Complete Destruction | 34.120425 |
| 60 | 90 | Complete Destruction | 35.852535 |
| 61 | 95 | Complete Destruction | 38.405797 |
| 62 | 100 | Complete Destruction | 32.478006 |
63 rows × 3 columns
fig, ax = plt.subplots(figsize=(20,10))
sns.barplot(x='age', y='value', hue='variable', data=ageDatasetMelted, palette="magma", ax=ax)
ax.set_ylabel('Percent')
ax.set_xlabel('Age (years)')
ax.set_title('Percent of Buildings Damaged by Building Age')
ax.legend(title='');
Initial visualization of the data shows a normal distribution with a right skew for high and medium damage counts, while low damage counts seem to steadily decline with building age, indicating that the building becomes more succeptible to damage with age.
The percentage damage visulations show a trend of newer buildings having a lower percentage of complete destruction. This trend continues until the building is around 35 years old and then plateaus with slight fluctuations. This is also reflected in the percentage of low damage in buildings which drop sharply in the same 0 to 30 year period
#plot damage grade vs flour counts frequency
plt.figure(figsize=(15,10))
sns.countplot(x=finalDataset["count_floors_pre_eq"],hue=finalDataset["damage_grade"],palette="magma")
<AxesSubplot:xlabel='count_floors_pre_eq', ylabel='count'>
#Create a df that holds the % level of damage by floor count
floorDataset = pd.DataFrame(columns=['floors', 'Low Damage', 'Medium Damage', 'Complete Destruction'])
count = 0
for floor in range(1,8,1):
num_1 = 0
num_2 = 0
num_3 = 0
df1 = finalDataset[(finalDataset.count_floors_pre_eq == floor) & (finalDataset.damage_grade == 1)]
num_1 += len(df1)
df2 = finalDataset[(finalDataset.count_floors_pre_eq == floor) & (finalDataset.damage_grade == 2)]
num_2 += len(df2)
df3 = finalDataset[(finalDataset.count_floors_pre_eq == floor) & (finalDataset.damage_grade == 3)]
num_3 += len(df3)
floorDataset.loc[count, 'floors'] = floor
floorDataset.loc[count, 'Low Damage'] = num_1 / (num_1 + num_2 + num_3) * 100
floorDataset.loc[count, 'Medium Damage'] = num_2 / (num_1 + num_2 + num_3) * 100
floorDataset.loc[count, 'Complete Destruction'] = num_3 / (num_1 + num_2 + num_3) * 100
count += 1
floorDataset
| floors | Low Damage | Medium Damage | Complete Destruction | |
|---|---|---|---|---|
| 0 | 1 | 22.177988 | 51.428006 | 26.394006 |
| 1 | 2 | 8.040326 | 59.996297 | 31.963377 |
| 2 | 3 | 4.561555 | 51.205567 | 44.232878 |
| 3 | 4 | 13.034661 | 62.205015 | 24.760324 |
| 4 | 5 | 11.308994 | 65.939448 | 22.751558 |
| 5 | 6 | 25.358852 | 63.636364 | 11.004785 |
| 6 | 7 | 25.641026 | 64.102564 | 10.25641 |
meltedFloorDataset = floorDataset.melt(id_vars='floors')
meltedFloorDataset
| floors | variable | value | |
|---|---|---|---|
| 0 | 1 | Low Damage | 22.177988 |
| 1 | 2 | Low Damage | 8.040326 |
| 2 | 3 | Low Damage | 4.561555 |
| 3 | 4 | Low Damage | 13.034661 |
| 4 | 5 | Low Damage | 11.308994 |
| 5 | 6 | Low Damage | 25.358852 |
| 6 | 7 | Low Damage | 25.641026 |
| 7 | 1 | Medium Damage | 51.428006 |
| 8 | 2 | Medium Damage | 59.996297 |
| 9 | 3 | Medium Damage | 51.205567 |
| 10 | 4 | Medium Damage | 62.205015 |
| 11 | 5 | Medium Damage | 65.939448 |
| 12 | 6 | Medium Damage | 63.636364 |
| 13 | 7 | Medium Damage | 64.102564 |
| 14 | 1 | Complete Destruction | 26.394006 |
| 15 | 2 | Complete Destruction | 31.963377 |
| 16 | 3 | Complete Destruction | 44.232878 |
| 17 | 4 | Complete Destruction | 24.760324 |
| 18 | 5 | Complete Destruction | 22.751558 |
| 19 | 6 | Complete Destruction | 11.004785 |
| 20 | 7 | Complete Destruction | 10.25641 |
fig, ax = plt.subplots(figsize=(20,10))
sns.barplot(x='floors', y='value', hue='variable', data=meltedFloorDataset, palette="magma", ax=ax)
ax.set_ylabel('Percent')
ax.set_xlabel('Number of Floors')
ax.set_title('Percent of Buildings Damaged by Number of Floors')
ax.legend(title='');
The figure above shows a trend where the percentage of buildings completely destroyed increased with the number of floors until 3 floors. Then there is a steady drop in buildings completely destroyed which sees its lowest percentage of complete destruction at buildings with 7 floors. This is likely due to taller buildings requiring and having stronger foundations and constructions leading to lower damage.
The percentage of buildings with medium damage has a overall increase with the number of floors with slight flucuations showing that despite surviving complete destruction, taller buildings are still succeptible to medium damage.
fig, axes = plt.subplots(ncols = 3, figsize = (20, 5))
sns.boxplot(y= finalDataset['geo_level_1_id'], x= finalDataset['damage_grade'], ax = axes[0])
sns.boxplot(y= finalDataset['geo_level_2_id'], x= finalDataset['damage_grade'], ax = axes[1])
sns.boxplot(y= finalDataset['geo_level_3_id'], x= finalDataset['damage_grade'], ax = axes[2])
#doesn't seem to be any connection between geo level and damage grade
<AxesSubplot:xlabel='damage_grade', ylabel='geo_level_3_id'>
#pre-encoded "mergedDataset" is used for this operation
categoricalCols=mergedDataset.select_dtypes(include="object")
font={'weight' : 'bold',
'size': 16}
q=1
plt.figure(figsize=(20,20))
for j in categoricalCols:
plt.subplot(3,3,q)
ax=sns.countplot(x = mergedDataset[j].dropna(),palette="Paired",hue=mergedDataset["damage_grade"])
plt.xticks(**font)
plt.yticks(**font)
plt.xlabel(j)
plt.legend(["Low damage","Avg damage","High damage"])
q+=1
plt.show()
planConfigCols = [col for col in finalDataset if col.startswith('plan_configuration')]
planConfigCols
['plan_configuration_a', 'plan_configuration_c', 'plan_configuration_d', 'plan_configuration_f', 'plan_configuration_m', 'plan_configuration_n', 'plan_configuration_o', 'plan_configuration_q', 'plan_configuration_s', 'plan_configuration_u']
#analyze plan configuration vs damage grade
for col in planConfigCols:
plt.figure(figsize=(15,5))
sns.countplot(x=finalDataset[col],hue=finalDataset.damage_grade,palette="magma")
plt.title(col+ " VS Damage",fontweight="bold")
plt.legend(["Low damage","Avg damage","High damage"])
plt.show()
foundationTypeCols = [col for col in finalDataset if col.startswith('foundation_type')]
foundationTypeCols
['foundation_type_h', 'foundation_type_i', 'foundation_type_r', 'foundation_type_u', 'foundation_type_w']
#analyze and visualize foundation type vs damage grade
for col in foundationTypeCols:
plt.figure(figsize=(15,5))
sns.countplot(x=finalDataset[col],hue=finalDataset.damage_grade,palette="magma")
plt.title(col+ " VS Damage",fontweight="bold")
plt.legend(["Low damage","Avg damage","High damage"])
plt.show()
otherFloorTypeCols = [col for col in finalDataset if col.startswith('other_floor_type')]
otherFloorTypeCols
['other_floor_type_j', 'other_floor_type_q', 'other_floor_type_s', 'other_floor_type_x']
#analyze and visualize other floor type vs damage grade
for col in otherFloorTypeCols:
plt.figure(figsize=(15,5))
sns.countplot(x=finalDataset[col],hue=finalDataset.damage_grade,palette="magma")
plt.title(col+ " VS Damage",fontweight="bold")
plt.legend(["Low damage","Avg damage","High damage"])
plt.show()
roofTypeCols = [col for col in finalDataset if col.startswith('roof_type')]
roofTypeCols
['roof_type_n', 'roof_type_q', 'roof_type_x']
#analyze and visualize roof type vs damage grade
for col in roofTypeCols:
plt.figure(figsize=(15,5))
sns.countplot(x=finalDataset[col],hue=finalDataset.damage_grade,palette="magma")
plt.title(col+ " VS Damage",fontweight="bold")
plt.legend(["Low damage","Avg damage","High damage"])
plt.show()
positionCols = [col for col in finalDataset if col.startswith('position')]
positionCols
['position_j', 'position_o', 'position_s', 'position_t']
#analyze and visualize position vs damage grade
for col in positionCols:
plt.figure(figsize=(15,5))
sns.countplot(x=finalDataset[col],hue=finalDataset.damage_grade,palette="magma")
plt.title(col+ " VS Damage",fontweight="bold")
plt.legend(["Low damage","Avg damage","High damage"])
plt.show()
plt.figure(figsize=(10,8))
ax=finalDataset.groupby("damage_grade")["count_families"].sum().sort_values().plot.bar(color=["mediumturquoise","turquoise","aquamarine"],
)
totals = []
for i in ax.patches:
totals.append(i.get_height())
total = sum(totals)
for i in ax.patches:
ax.text(i.get_x()+.12, i.get_height()+5, \
str(round((i.get_height()/total)*100, 2))+'%', fontsize=15,
color='black')
plt.title("Families Affected due to earthquake")
plt.ylabel("No. of families")
plt.xlabel("Damage Grade")
plt.show()
# Import required packages for model creation
import matplotlib.ticker as mtick # Axis visuals
from math import pi # Radar chart support
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.preprocessing import OneHotEncoder, StandardScaler, MinMaxScaler, FunctionTransformer, LabelEncoder, RobustScaler
from sklearn.feature_selection import VarianceThreshold
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import roc_auc_score, f1_score, confusion_matrix
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from nltk.classify.scikitlearn import SklearnClassifier
roc_auc_score
# Plot styling
plt.style.use('ggplot')
print("Structure data has {} rows and {} columns".format(*finalDataset.shape))
finalDataset.head()
Structure data has 260601 rows and 70 columns
| building_id | geo_level_1_id | geo_level_2_id | geo_level_3_id | count_floors_pre_eq | age | area_percentage | height_percentage | has_superstructure_adobe_mud | has_superstructure_mud_mortar_stone | has_superstructure_stone_flag | has_superstructure_cement_mortar_stone | has_superstructure_mud_mortar_brick | has_superstructure_cement_mortar_brick | has_superstructure_timber | has_superstructure_bamboo | has_superstructure_rc_non_engineered | has_superstructure_rc_engineered | has_superstructure_other | count_families | has_secondary_use | has_secondary_use_agriculture | has_secondary_use_hotel | has_secondary_use_rental | has_secondary_use_institution | has_secondary_use_school | has_secondary_use_industry | has_secondary_use_health_post | has_secondary_use_gov_office | has_secondary_use_use_police | has_secondary_use_other | damage_grade | land_surface_condition_n | land_surface_condition_o | land_surface_condition_t | foundation_type_h | foundation_type_i | foundation_type_r | foundation_type_u | foundation_type_w | roof_type_n | roof_type_q | roof_type_x | ground_floor_type_f | ground_floor_type_m | ground_floor_type_v | ground_floor_type_x | ground_floor_type_z | other_floor_type_j | other_floor_type_q | other_floor_type_s | other_floor_type_x | position_j | position_o | position_s | position_t | plan_configuration_a | plan_configuration_c | plan_configuration_d | plan_configuration_f | plan_configuration_m | plan_configuration_n | plan_configuration_o | plan_configuration_q | plan_configuration_s | plan_configuration_u | legal_ownership_status_a | legal_ownership_status_r | legal_ownership_status_v | legal_ownership_status_w | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 802906 | 6 | 487 | 12198 | 2 | 30 | 6 | 5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1 | 28830 | 8 | 900 | 2812 | 2 | 10 | 8 | 7 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 2 | 94947 | 21 | 363 | 8973 | 2 | 10 | 5 | 5 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 3 | 590882 | 22 | 418 | 10694 | 2 | 10 | 6 | 5 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 4 | 201944 | 11 | 131 | 1488 | 3 | 30 | 8 | 9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
finalDataset.dtypes
building_id int64
geo_level_1_id int64
geo_level_2_id int64
geo_level_3_id int64
count_floors_pre_eq int64
...
plan_configuration_u uint8
legal_ownership_status_a uint8
legal_ownership_status_r uint8
legal_ownership_status_v uint8
legal_ownership_status_w uint8
Length: 70, dtype: object
# View missingness
df_temp = finalDataset.isnull().sum().reset_index(name='count')
display(df_temp[df_temp['count'] > 0])
# Drop Rows with missing data
finalDataset.dropna(inplace = True)
| index | count |
|---|
plt.figure(figsize=(12,5))
ax = sns.countplot(x='damage_grade', data=finalDataset, order = [1, 2, 3])
ax.yaxis.set_major_formatter(mtick.StrMethodFormatter('{x:,.0f}'))
plt.title("Distribution of Damage Grade")
plt.xlabel("Damage Grading")
plt.show()
In this section, the following predictive models are built: KNN Random Forest Gradient Boosted Machines The goal is to identify the model that best predicts the size of the damage to new data given the input functions. In this section, we are configuring a preprocessing pipeline to prepare the training data. The test data set is taken from main data, which gives us the opportunity to evaluate the model's performance with completely new data. The model is evaluated against test data using the evaluation metric F1.
# Create training and testing data
x_train, x_test, y_train, y_test = train_test_split(finalDataset.drop('damage_grade', axis = 1),
finalDataset['damage_grade'],
test_size = 0.15,
random_state = 1989,
stratify = finalDataset['damage_grade'],
shuffle=True)
# Visualise proportions on train and test
pd.merge(y_train.reset_index(name="damage_grade").damage_grade.value_counts(normalize = True).reset_index(name="train"),
y_test.reset_index(name="damage_grade").damage_grade.value_counts(normalize = True).reset_index(name="test"),
on = "index",
how = "left")
| index | train | test | |
|---|---|---|---|
| 0 | 2 | 0.568913 | 0.568903 |
| 1 | 3 | 0.334680 | 0.334681 |
| 2 | 1 | 0.096406 | 0.096416 |
A 5-fold cross-validation strategy is used, which gives confidence that over-adapt and allows to make fair comparisons between competing models. Finally, this method divides the data into 5 random partitions and creates 5 models. For each model, 4 data partitions are used to train the model and the fifth is used for testing.
# Cross Validation Strategy
# Store the Kfold object
kfold = KFold(n_splits=5, random_state=1989, shuffle = True)
# List of classification models
classifiers = [('KNN', KNeighborsClassifier(3)),
('RF', RandomForestClassifier()),
('GBM', GradientBoostingClassifier())]
# Evaluate each model
results = []
names = []
for name, model in classifiers:
cv_results = cross_val_score(model, x_train, y_train, cv=kfold, scoring='f1_micro')
results.append(cv_results)
names.append(name)
print("%s: %f (%f)" % (name, cv_results.mean(), cv_results.std()))
KNN: 0.447533 (0.002719) RF: 0.713828 (0.001642) GBM: 0.680046 (0.002064)
All three models are based on the same 5 folds data, and the following output shows how each model performs with the F1 score evaluation metric. The statistics provide a summary assessment of how well the model performs overall over all folds.
KNN model: performed worse, achieved an F1 score of 44.7%, with a low standard deviation score indicating similar performance at 5 folds.
Random Forest Model: The F1 score for this model is 71.4%, which is an improvement of more than 25% compared to the simple KNN model. The standard deviation values of the model also show similar performance for folds.
Gradient Boosted Machine model - This model has an F1 score of 68%, which is bad compared to Random Forest but strong compared to the KNN model.
# Summarise scores
pd.DataFrame(np.transpose(results), columns = names).reset_index()
| index | KNN | RF | GBM | |
|---|---|---|---|---|
| 0 | 0 | 0.446639 | 0.711345 | 0.678728 |
| 1 | 1 | 0.444269 | 0.712609 | 0.683039 |
| 2 | 2 | 0.447587 | 0.715521 | 0.677915 |
| 3 | 3 | 0.452508 | 0.715521 | 0.682001 |
| 4 | 4 | 0.446662 | 0.714144 | 0.678547 |
The random forest model will be the only one that progresses, reconstructed on the entire dataset and then used to predict the extent of the damage to test data. The F1 score is again used to assess the model's performance to predict grades.
# Build the random forest on the full training data
rf = RandomForestClassifier() # instance
rf.fit(x_train, y_train) # fit model
y_pred = rf.predict(x_test) # predict on test
# Calculate confusion matrix
con_mat = confusion_matrix(y_test, y_pred)
con_mat = con_mat / con_mat.astype(np.float).sum(axis=1)
# Evaluate model
print("F1 Score: %f " % f1_score(y_test, y_pred,average='micro'))
# Plot Model
plt.figure(figsize = (12,6))
ax = sns.heatmap(con_mat, annot = True)
plt.title("Confusion Matrix")
plt.show()
# Clear objects
del rf, y_pred, con_mat, ax
F1 Score: 0.718196
<ipython-input-127-e83694330d17>:8: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations con_mat = con_mat / con_mat.astype(np.float).sum(axis=1)